From 4e34cacdb924367cf592c855899d1305b5c19c5f Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 17:13:45 +0800 Subject: [PATCH 01/23] update English tutorial --- .github/workflows/sphinx_docs.yml | 8 +- .gitignore | 2 +- .pre-commit-config.yaml | 1 + docs/sphinx_doc/Makefile | 32 - docs/sphinx_doc/assets/redirect.html | 12 - docs/sphinx_doc/en/source/_static/custom.css | 4 - .../source/_templates/language_selector.html | 5 - .../en/source/_templates/layout.html | 3 - docs/sphinx_doc/en/source/conf.py | 93 --- docs/sphinx_doc/en/source/index.rst | 63 -- .../en/source/tutorial/101-agentscope.md | 116 ---- .../en/source/tutorial/102-installation.md | 89 --- .../en/source/tutorial/103-example.md | 108 --- .../en/source/tutorial/104-usecase.md | 302 -------- .../en/source/tutorial/105-logging.md | 65 -- .../en/source/tutorial/201-agent.md | 189 ----- .../en/source/tutorial/202-pipeline.md | 301 -------- .../en/source/tutorial/203-model.md | 635 ----------------- .../en/source/tutorial/203-parser.md | 530 -------------- .../en/source/tutorial/203-stream.md | 123 ---- .../en/source/tutorial/204-service.md | 334 --------- .../en/source/tutorial/205-memory.md | 223 ------ .../en/source/tutorial/206-prompt.md | 554 --------------- .../en/source/tutorial/207-monitor.md | 128 ---- .../en/source/tutorial/208-distribute.md | 469 ------------- docs/sphinx_doc/en/source/tutorial/209-gui.md | 210 ------ .../en/source/tutorial/209-prompt_opt.md | 440 ------------ docs/sphinx_doc/en/source/tutorial/211-web.md | 90 --- .../en/source/tutorial/301-community.md | 30 - .../en/source/tutorial/302-contribute.md | 70 -- .../en/source/tutorial/contribute.rst | 8 - docs/sphinx_doc/en/source/tutorial/main.md | 35 - docs/sphinx_doc/requirements.txt | 11 - docs/sphinx_doc/template/module.rst_t | 5 - docs/sphinx_doc/template/package.rst_t | 10 - .../zh_CN/source/_static/custom.css | 4 - .../source/_templates/language_selector.html | 5 - .../zh_CN/source/_templates/layout.html | 3 - docs/sphinx_doc/zh_CN/source/conf.py | 87 --- docs/sphinx_doc/zh_CN/source/index.rst | 64 -- .../zh_CN/source/tutorial/101-agentscope.md | 93 --- .../zh_CN/source/tutorial/102-installation.md | 92 --- .../zh_CN/source/tutorial/103-example.md | 104 --- .../zh_CN/source/tutorial/104-usecase.md | 305 -------- .../zh_CN/source/tutorial/105-logging.md | 95 --- .../zh_CN/source/tutorial/201-agent.md | 191 ----- .../zh_CN/source/tutorial/202-pipeline.md | 302 -------- .../zh_CN/source/tutorial/203-model.md | 650 ------------------ .../zh_CN/source/tutorial/203-parser.md | 534 -------------- .../zh_CN/source/tutorial/203-stream.md | 121 ---- .../zh_CN/source/tutorial/204-service.md | 312 --------- .../zh_CN/source/tutorial/205-memory.md | 214 ------ .../zh_CN/source/tutorial/206-prompt.md | 488 ------------- .../zh_CN/source/tutorial/207-monitor.md | 126 ---- .../zh_CN/source/tutorial/208-distribute.md | 476 ------------- .../zh_CN/source/tutorial/209-gui.md | 206 ------ .../zh_CN/source/tutorial/209-prompt_opt.md | 419 ----------- .../zh_CN/source/tutorial/210-rag.md | 287 -------- .../zh_CN/source/tutorial/211-web.md | 84 --- .../zh_CN/source/tutorial/301-community.md | 30 - .../zh_CN/source/tutorial/302-contribute.md | 70 -- .../zh_CN/source/tutorial/contribute.rst | 8 - docs/sphinx_doc/zh_CN/source/tutorial/main.md | 35 - .../build_sphinx_doc.sh => tutorial/build.sh} | 2 +- docs/tutorial/en/Makefile | 20 + docs/tutorial/en/build.sh | 3 + docs/tutorial/en/make.bat | 35 + .../en/source/_static/css/gallery.css | 83 +++ docs/tutorial/en/source/conf.py | 54 ++ docs/tutorial/en/source/index.rst | 48 ++ docs/tutorial/en/source/tutorial/README.md | 0 docs/tutorial/en/source/tutorial/agent.py | 179 +++++ .../en/source/tutorial/builtin_agent.py | 226 ++++++ .../en/source/tutorial/conversation.py | 134 ++++ .../en/source/tutorial/distribution.py | 235 +++++++ docs/tutorial/en/source/tutorial/examples.py | 178 +++++ docs/tutorial/en/source/tutorial/faq.md | 49 ++ docs/tutorial/en/source/tutorial/low_code.py | 121 ++++ docs/tutorial/en/source/tutorial/message.py | 57 ++ docs/tutorial/en/source/tutorial/model.py | 341 +++++++++ docs/tutorial/en/source/tutorial/monitor.py | 73 ++ .../en/source/tutorial/multimodality.py | 78 +++ docs/tutorial/en/source/tutorial/prompt.py | 138 ++++ .../en/source/tutorial/prompt_optimization.py | 122 ++++ .../tutorial/en/source/tutorial/quickstart.py | 67 ++ .../en/source/tutorial/rag.md} | 171 +++-- docs/tutorial/en/source/tutorial/streaming.py | 139 ++++ .../en/source/tutorial/structured_output.py | 250 +++++++ docs/tutorial/en/source/tutorial/tool.py | 115 ++++ docs/tutorial/en/source/tutorial/visual.py | 229 ++++++ .../en/source/tutorial/web_browser.py | 11 + 91 files changed, 3073 insertions(+), 10788 deletions(-) delete mode 100644 docs/sphinx_doc/Makefile delete mode 100644 docs/sphinx_doc/assets/redirect.html delete mode 100644 docs/sphinx_doc/en/source/_static/custom.css delete mode 100644 docs/sphinx_doc/en/source/_templates/language_selector.html delete mode 100644 docs/sphinx_doc/en/source/_templates/layout.html delete mode 100644 docs/sphinx_doc/en/source/conf.py delete mode 100644 docs/sphinx_doc/en/source/index.rst delete mode 100644 docs/sphinx_doc/en/source/tutorial/101-agentscope.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/102-installation.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/103-example.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/104-usecase.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/105-logging.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/201-agent.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/202-pipeline.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/203-model.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/203-parser.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/203-stream.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/204-service.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/205-memory.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/206-prompt.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/207-monitor.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/208-distribute.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/209-gui.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/209-prompt_opt.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/211-web.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/301-community.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/302-contribute.md delete mode 100644 docs/sphinx_doc/en/source/tutorial/contribute.rst delete mode 100644 docs/sphinx_doc/en/source/tutorial/main.md delete mode 100644 docs/sphinx_doc/requirements.txt delete mode 100644 docs/sphinx_doc/template/module.rst_t delete mode 100644 docs/sphinx_doc/template/package.rst_t delete mode 100644 docs/sphinx_doc/zh_CN/source/_static/custom.css delete mode 100644 docs/sphinx_doc/zh_CN/source/_templates/language_selector.html delete mode 100644 docs/sphinx_doc/zh_CN/source/_templates/layout.html delete mode 100644 docs/sphinx_doc/zh_CN/source/conf.py delete mode 100644 docs/sphinx_doc/zh_CN/source/index.rst delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/101-agentscope.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/102-installation.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/103-example.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/104-usecase.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/201-agent.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/202-pipeline.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/203-model.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/203-parser.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/203-stream.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/204-service.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/205-memory.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/206-prompt.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/207-monitor.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/208-distribute.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/209-gui.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/209-prompt_opt.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/210-rag.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/211-web.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/301-community.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/302-contribute.md delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/contribute.rst delete mode 100644 docs/sphinx_doc/zh_CN/source/tutorial/main.md rename docs/{sphinx_doc/build_sphinx_doc.sh => tutorial/build.sh} (90%) create mode 100644 docs/tutorial/en/Makefile create mode 100644 docs/tutorial/en/build.sh create mode 100644 docs/tutorial/en/make.bat create mode 100644 docs/tutorial/en/source/_static/css/gallery.css create mode 100644 docs/tutorial/en/source/conf.py create mode 100644 docs/tutorial/en/source/index.rst create mode 100644 docs/tutorial/en/source/tutorial/README.md create mode 100644 docs/tutorial/en/source/tutorial/agent.py create mode 100644 docs/tutorial/en/source/tutorial/builtin_agent.py create mode 100644 docs/tutorial/en/source/tutorial/conversation.py create mode 100644 docs/tutorial/en/source/tutorial/distribution.py create mode 100644 docs/tutorial/en/source/tutorial/examples.py create mode 100644 docs/tutorial/en/source/tutorial/faq.md create mode 100644 docs/tutorial/en/source/tutorial/low_code.py create mode 100644 docs/tutorial/en/source/tutorial/message.py create mode 100644 docs/tutorial/en/source/tutorial/model.py create mode 100644 docs/tutorial/en/source/tutorial/monitor.py create mode 100644 docs/tutorial/en/source/tutorial/multimodality.py create mode 100644 docs/tutorial/en/source/tutorial/prompt.py create mode 100644 docs/tutorial/en/source/tutorial/prompt_optimization.py create mode 100644 docs/tutorial/en/source/tutorial/quickstart.py rename docs/{sphinx_doc/en/source/tutorial/210-rag.md => tutorial/en/source/tutorial/rag.md} (76%) create mode 100644 docs/tutorial/en/source/tutorial/streaming.py create mode 100644 docs/tutorial/en/source/tutorial/structured_output.py create mode 100644 docs/tutorial/en/source/tutorial/tool.py create mode 100644 docs/tutorial/en/source/tutorial/visual.py create mode 100644 docs/tutorial/en/source/tutorial/web_browser.py diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 416d0a08d..60fe14ed5 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -35,16 +35,16 @@ jobs: - id: build name: Build Documentation run: | - cd docs/sphinx_doc - ./build_sphinx_doc.sh + cd docs/tutorial/en/ + ./build.sh - name: Upload Documentation uses: actions/upload-artifact@v4 with: name: SphinxDoc - path: 'docs/sphinx_doc/build' + path: 'docs/tutorial/en/build' - uses: peaceiris/actions-gh-pages@v3 if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} with: github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: 'docs/sphinx_doc/build/html' + publish_dir: 'docs/tutorial/build/html' cname: doc.agentscope.io \ No newline at end of file diff --git a/.gitignore b/.gitignore index db8d4d7f4..f59dd25fe 100644 --- a/.gitignore +++ b/.gitignore @@ -134,7 +134,7 @@ dmypy.json .DS_Store # docs -docs/sphinx_doc/build/ +docs/tutorial/en/build/ # Used to save loggings and files *runs/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e65b4ae1f..b1891d5c6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -57,6 +57,7 @@ repos: hooks: - id: flake8 args: ["--extend-ignore=E203"] + exclude: ^docs - repo: https://github.com/pylint-dev/pylint rev: v3.0.2 hooks: diff --git a/docs/sphinx_doc/Makefile b/docs/sphinx_doc/Makefile deleted file mode 100644 index a90845ebf..000000000 --- a/docs/sphinx_doc/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# Makefile - -SPHINXBUILD = sphinx-build -SPHINXPROJ = AgentScope-Doc -ASSETSDIR = assets -BUILDDIR = build/html -SOURCEDIR_EN = en/source -BUILDDIR_EN = build/html/en -SOURCEDIR_ZH = zh_CN/source -BUILDDIR_ZH = build/html/zh_CN - -# English document -en: - @$(SPHINXBUILD) -b html "$(SOURCEDIR_EN)" "$(BUILDDIR_EN)" - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR_EN)" - -# Chinese document -zh_CN: - @$(SPHINXBUILD) -b html "$(SOURCEDIR_ZH)" "$(BUILDDIR_ZH)" - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR_ZH)" - -index: - @cp "$(ASSETSDIR)/redirect.html" "$(BUILDDIR)/index.html" - -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR_EN)" "$(BUILDDIR_EN)" $(O) - -all: en zh_CN index - -.PHONY: all en zh_CN index \ No newline at end of file diff --git a/docs/sphinx_doc/assets/redirect.html b/docs/sphinx_doc/assets/redirect.html deleted file mode 100644 index 1b7980de8..000000000 --- a/docs/sphinx_doc/assets/redirect.html +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - AgentScope Documentation - - -

Redirecting to English documentation...

-

If you are not redirected, click here.

- - diff --git a/docs/sphinx_doc/en/source/_static/custom.css b/docs/sphinx_doc/en/source/_static/custom.css deleted file mode 100644 index 68f11ceed..000000000 --- a/docs/sphinx_doc/en/source/_static/custom.css +++ /dev/null @@ -1,4 +0,0 @@ -.language-selector a { - color: white; - width: 20px; -} \ No newline at end of file diff --git a/docs/sphinx_doc/en/source/_templates/language_selector.html b/docs/sphinx_doc/en/source/_templates/language_selector.html deleted file mode 100644 index a8aca93e0..000000000 --- a/docs/sphinx_doc/en/source/_templates/language_selector.html +++ /dev/null @@ -1,5 +0,0 @@ - -
- English | - 中文 -
diff --git a/docs/sphinx_doc/en/source/_templates/layout.html b/docs/sphinx_doc/en/source/_templates/layout.html deleted file mode 100644 index 1d182d309..000000000 --- a/docs/sphinx_doc/en/source/_templates/layout.html +++ /dev/null @@ -1,3 +0,0 @@ - -{% extends "!layout.html" %} {% block sidebartitle %} {{ super() }} {% include -"language_selector.html" %} {% endblock %} diff --git a/docs/sphinx_doc/en/source/conf.py b/docs/sphinx_doc/en/source/conf.py deleted file mode 100644 index 788bda020..000000000 --- a/docs/sphinx_doc/en/source/conf.py +++ /dev/null @@ -1,93 +0,0 @@ -# -*- coding: utf-8 -*- -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys - -# sys.path.insert(0, os.path.abspath("../../../src/agentscope")) - - -# -- Project information ----------------------------------------------------- - -language = "en" - -project = "AgentScope" -copyright = "2024, Alibaba Tongyi Lab" -author = "SysML team of Alibaba Tongyi Lab" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.autosummary", - "sphinx.ext.viewcode", - "sphinx.ext.napoleon", - "sphinxcontrib.mermaid", - "myst_parser", - "sphinx.ext.autosectionlabel", -] - -# Prefix document path to section labels, otherwise autogenerated labels would -# look like 'heading' rather than 'path/to/file:heading' -autosectionlabel_prefix_document = True -autosummary_generate = True -autosummary_ignore_module_all = False - -autodoc_member_order = "bysource" - -autodoc_default_options = { - "members": True, - "special-members": "__init__", -} - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - -autodoc_default_options = { - "members": True, - "special-members": "__init__", -} - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] - -html_theme_options = { - "navigation_depth": 2, -} - -source_suffix = { - ".rst": "restructuredtext", - ".md": "markdown", -} - -html_css_files = [ - "custom.css", -] diff --git a/docs/sphinx_doc/en/source/index.rst b/docs/sphinx_doc/en/source/index.rst deleted file mode 100644 index 7eeb888f3..000000000 --- a/docs/sphinx_doc/en/source/index.rst +++ /dev/null @@ -1,63 +0,0 @@ -.. AgentScope documentation master file, created by - sphinx-quickstart on Fri Jan 5 17:53:54 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -:github_url: https://github.com/modelscope/agentscope - -AgentScope Documentation -====================================== - - -.. include:: tutorial/main.md - :parser: myst_parser.sphinx_ - -.. toctree:: - :maxdepth: 1 - :glob: - :hidden: - :caption: AgentScope Tutorial - - tutorial/101-agentscope.md - tutorial/102-installation.md - tutorial/103-example.md - - tutorial/203-model.md - tutorial/203-stream.md - tutorial/206-prompt.md - tutorial/201-agent.md - tutorial/205-memory.md - tutorial/203-parser.md - tutorial/209-prompt_opt.md - tutorial/204-service.md - tutorial/202-pipeline.md - tutorial/208-distribute.md - tutorial/209-gui.md - tutorial/210-rag.md - tutorial/211-web.md - tutorial/105-logging.md - tutorial/207-monitor.md - tutorial/104-usecase.md - - tutorial/contribute.rst - - -.. toctree:: - :maxdepth: 1 - :glob: - :caption: AgentScope API Reference - - agentscope - agentscope.message - agentscope.models - agentscope.agents - agentscope.memory - agentscope.parsers - agentscope.exception - agentscope.pipelines - agentscope.service - agentscope.rpc - agentscope.server - agentscope.web - agentscope.prompt - agentscope.utils \ No newline at end of file diff --git a/docs/sphinx_doc/en/source/tutorial/101-agentscope.md b/docs/sphinx_doc/en/source/tutorial/101-agentscope.md deleted file mode 100644 index ed05658ea..000000000 --- a/docs/sphinx_doc/en/source/tutorial/101-agentscope.md +++ /dev/null @@ -1,116 +0,0 @@ -(101-agentscope-en)= - -# About AgentScope - -In this tutorial, we will provide an overview of AgentScope by answering -several questions, including what's AgentScope, what can AgentScope provide, -and why we should choose AgentScope. Let's get started! - -## What is AgentScope? - -AgentScope is a developer-centric multi-agent platform, which enables -developers to build their LLM-empowered multi-agent applications with less -effort. - -With the advance of large language models, developers are able to build -diverse applications. -In order to connect LLMs to data and services and solve complex tasks, -AgentScope provides a series of development tools and components for ease of -development. -It features - -- **usability**, -- **robustness**, -- **the support of multi-modal data**, -- **distributed deployment**. - -## Key Concepts - -### Message - -Message is a carrier of information (e.g. instructions, multi-modal -data, and dialogue). In AgentScope, message is a Python dict subclass -with `name` and `content` as necessary fields, and `url` as an optional -field referring to additional resources. - -### Agent - -Agent is an autonomous entity capable of interacting with environment and -agents, and taking actions to change the environment. In AgentScope, an -agent takes message as input and generates corresponding response message. - -### Service - -Service refers to the functional APIs that enable agents to perform -specific tasks. In AgentScope, services are categorized into model API -services, which are channels to use the LLMs, and general API services, -which provide a variety of tool functions. - -### Workflow - -Workflow represents ordered sequences of agent executions and message -exchanges between agents, analogous to computational graphs in TensorFlow, -but with the flexibility to accommodate non-DAG structures. - -## Why AgentScope? - -**Exceptional usability for developers.** -AgentScope provides high usability for developers with flexible syntactic -sugars, ready-to-use components, and pre-built examples. - -**Robust fault tolerance for diverse models and APIs.** -AgentScope ensures robust fault tolerance for diverse models, APIs, and -allows developers to build customized fault-tolerant strategies. - -**Extensive compatibility for multi-modal application.** -AgentScope supports multi-modal data (e.g., files, images, audio and videos) -in both dialog presentation, message transmission and data storage. - -**Optimized efficiency for distributed multi-agent operations.** AgentScope -introduces an actor-based distributed mechanism that enables centralized -programming of complex distributed workflows, and automatic parallel -optimization. - -## How is AgentScope designed? - -The architecture of AgentScope comprises three hierarchical layers. The -layers provide supports for multi-agent applications from different levels, -including elementary and advanced functionalities of a single agent -(**utility layer**), resources and runtime management (**manager and wrapper -layer**), and agent-level to workflow-level programming interfaces (**agent -layer**). AgentScope introduces intuitive abstractions designed to fulfill -the diverse functionalities inherent to each layer and simplify the -complicated interlayer dependencies when building multi-agent systems. -Furthermore, we offer programming interfaces and default mechanisms to -strengthen the resilience of multi-agent systems against faults within -different layers. - -## AgentScope Code Structure - -```bash -AgentScope -├── src -│ ├── agentscope -│ | ├── agents # Core components and implementations pertaining to agents. -│ | ├── memory # Structures for agent memory. -│ | ├── models # Interfaces for integrating diverse model APIs. -│ | ├── pipelines # Fundamental components and implementations for running pipelines. -│ | ├── rpc # Rpc module for agent distributed deployment. -│ | ├── service # Services offering functions independent of memory and state. -| | ├── web # WebUI used to show dialogs. -│ | ├── utils # Auxiliary utilities and helper functions. -│ | ├── message.py # Definitions and implementations of messaging between agents. -│ | ├── prompt.py # Prompt engineering module for model input. -│ | ├── ... .. -│ | ├── ... .. -├── scripts # Scripts for launching local Model API -├── examples # Pre-built examples of different applications. -├── docs # Documentation tool for API reference. -├── tests # Unittest modules for continuous integration. -├── LICENSE # The official licensing agreement for AgentScope usage. -└── setup.py # Setup script for installing. -├── ... .. -└── ... .. -``` - -[[Return to the top]](#101-agentscope) diff --git a/docs/sphinx_doc/en/source/tutorial/102-installation.md b/docs/sphinx_doc/en/source/tutorial/102-installation.md deleted file mode 100644 index a39b1113c..000000000 --- a/docs/sphinx_doc/en/source/tutorial/102-installation.md +++ /dev/null @@ -1,89 +0,0 @@ -(102-installation-en)= - -# Installation - -To install AgentScope, you need to have Python 3.9 or higher installed. We recommend setting up a new virtual environment specifically for AgentScope: - -## Create a Virtual Environment - -### Using Conda - -If you're using Conda as your package and environment management tool, you can create a new virtual environment with Python 3.9 using the following commands: - -```bash -# Create a new virtual environment named 'agentscope' with Python 3.9 -conda create -n agentscope python=3.9 - -# Activate the virtual environment -conda activate agentscope -``` - -### Using Virtualenv - -Alternatively, if you prefer `virtualenv`, you can install it first (if it's not already installed) and then create a new virtual environment as shown: - -```bash -# Install virtualenv if it is not already installed -pip install virtualenv - -# Create a new virtual environment named 'agentscope' with Python 3.9 -virtualenv agentscope --python=python3.9 - -# Activate the virtual environment -source agentscope/bin/activate # On Windows use `agentscope\Scripts\activate` -``` - -## Installing AgentScope - -### Install with Pip - -If you prefer to install AgentScope from Pypi, you can do so easily using `pip`: - -```bash -# For centralized multi-agent applications -pip install agentscope -``` - -### Install from Source - -For users who prefer to install AgentScope directly from the source code, follow these steps to clone the repository and install the platform in editable mode: - -**_Note: This project is under active development, it's recommended to install AgentScope from source._** - -```bash -# Pull the source code from Github -git clone https://github.com/modelscope/agentscope.git -cd agentscope - -# For centralized multi-agent applications -pip install -e . -``` - -### Extra Dependencies - -The supported optional dependencies for AgentScope are list as follows: - -- ollama: Ollama API -- litellm: Litellm API -- zhipuai: Zhipuai API -- gemini: Gemini API -- service: The dependencies for different service functions -- distribute: The dependencies for distribution mode -- full: All the dependencies - -You can install one or more of these dependencies by adding them to the installation command. - -#### Windows -```bash -pip install agentscope[gemini] -# or -pip install agentscope[ollama,distribute] -``` -#### Mac & Linux -```bash -pip install agentscope\[gemini\] -# or -pip install agentscope\[ollama,distribute\] -``` - -[[Return to the top]](#102-installation-en) diff --git a/docs/sphinx_doc/en/source/tutorial/103-example.md b/docs/sphinx_doc/en/source/tutorial/103-example.md deleted file mode 100644 index 563d072d9..000000000 --- a/docs/sphinx_doc/en/source/tutorial/103-example.md +++ /dev/null @@ -1,108 +0,0 @@ -(103-start-en)= - -# Quick Start - -AgentScope is designed with a flexible communication mechanism. -In this tutorial, we will introduce the basic usage of AgentScope via a -simple standalone conversation between two agents (e.g. user and assistant -agents). - -## Step1: Prepare Model - -AgentScope decouples the deployment and invocation of models to better build multi-agent applications. - -In terms of model deployment, users can use third-party model services such -as OpenAI API, Google Gemini API, HuggingFace/ModelScope Inference API, or -quickly deploy local open-source model services through the [scripts](https://github.com/modelscope/agentscope/blob/main/scripts/README.md) in -the repository. - -While for model invocation, users should prepare a model configuration to specify the model service. Taking OpenAI Chat API as an example, the model configuration is like this: - -```python -model_config = { - "config_name": "{config_name}", # A unique name for the model config. - "model_type": "openai_chat", # Choose from "openai_chat", "openai_dall_e", or "openai_embedding". - - "model_name": "{model_name}", # The model identifier used in the OpenAI API, such as "gpt-3.5-turbo", "gpt-4", or "text-embedding-ada-002". - "api_key": "xxx", # Your OpenAI API key. If unset, the environment variable OPENAI_API_KEY is used. - "organization": "xxx", # Your OpenAI organization ID. If unset, the environment variable OPENAI_ORGANIZATION is used. -} -``` - -More details about model invocation, deployment and open-source models please refer to [Model](203-model-en) section. - -After preparing the model configuration, you can register your configuration by calling the `init` method of AgentScope. Additionally, you can load multiple model configurations at once. - -```python -import agentscope - -# init once by passing a list of config dict -openai_cfg_dict = { - # ... -} -modelscope_cfg_dict = { - # ... -} -agentscope.init(model_configs=[openai_cfg_dict, modelscope_cfg_dict]) -``` - -## Step2: Create Agents - -Creating agents is straightforward in AgentScope. After initializing AgentScope with your model configurations (Step 1 above), you can then define each agent with its corresponding role and specific model. - -```python -import agentscope -from agentscope.agents import DialogAgent, UserAgent - -# read model configs -agentscope.init(model_configs="./openai_model_configs.json") - -# Create a dialog agent and a user agent -dialogAgent = DialogAgent(name="assistant", model_config_name="gpt-4", sys_prompt="You are a helpful ai assistant") -userAgent = UserAgent() -``` - -**NOTE**: Please refer to [Customizing Your Own Agent](201-agent-en) for all available agents. - -## Step3: Agent Conversation - -"Message" is the primary means of communication between agents in AgentScope. They are Python dictionaries comprising essential fields like the actual `content` of this message and the sender's `name`. Optionally, a message can include a `url` to either a local file (image, video or audio) or website. - -```python -from agentscope.message import Msg - -# Example of a simple text message from Alice -message_from_alice = Msg("Alice", "Hi!") - -# Example of a message from Bob with an attached image -message_from_bob = Msg("Bob", "What about this picture I took?", url="/path/to/picture.jpg") -``` - -To start a conversation between two agents, such as `dialog_agent` and `user_agent`, you can use the following loop. The conversation continues until the user inputs `"exit"` which terminates the interaction. - -```python -x = None -while True: - x = dialogAgent(x) - x = userAgent(x) - - # Terminate the conversation if the user types "exit" - if x.content == "exit": - print("Exiting the conversation.") - break -``` - -For a more advanced approach, AgentScope offers the option of using pipelines to manage the flow of messages between agents. The `sequentialpipeline` stands for sequential speech, where each agent receive message from last agent and generate its response accordingly. - -```python -from agentscope.pipelines.functional import sequentialpipeline - -# Execute the conversation loop within a pipeline structure -x = None -while x is None or x.content != "exit": - x = sequentialpipeline([dialog_agent, user_agent]) -``` - -For more details about how to utilize pipelines for complex agent interactions, please refer to [Pipeline and MsgHub](202-pipeline-en). - -[[Return to the top]](#103-start-en) diff --git a/docs/sphinx_doc/en/source/tutorial/104-usecase.md b/docs/sphinx_doc/en/source/tutorial/104-usecase.md deleted file mode 100644 index 60a0ed02e..000000000 --- a/docs/sphinx_doc/en/source/tutorial/104-usecase.md +++ /dev/null @@ -1,302 +0,0 @@ -(104-usecase-en)= - -# Example: Werewolf Game - -img - -**Werewolf** is a well-known social-deduction game, that involves an imaginary village where a few villagers are secretly werewolves, and the objective is to identify who they are before they eliminate all other players. It's a good use case to demonstrate the interaction between multiple autonomous agents, each with its own objectives and the need for communication. - -Let the adventure begin to unlock the potential of multi-agent applications with AgentScope! - -## Getting Started - -Firstly, ensure that you have installed and configured AgentScope properly. Besides, we will involve the basic concepts of `Model API`, `Agent`, `Msg`, and `Pipeline,` as described in [Tutorial-Concept](101-agentscope.md). - -**Note**: all the configurations and code for this tutorial can be found in `examples/game_werewolf`. - -### Step 1: Prepare Model API and Set Model Configs - -As we discussed in the last tutorial, you need to prepare your model configurations into a JSON file for standard OpenAI chat API, FastChat, and vllm. More details and advanced usages such as configuring local models with POST API are presented in [Tutorial-Model-API](203-model.md). - -```json -[ - { - "config_name": "gpt-4-temperature-0.0", - "model_type": "openai_chat", - "model_name": "gpt-4", - "api_key": "xxx", - "organization": "xxx", - "generate_args": { - "temperature": 0.0 - } - } -] -``` - -### Step 2: Define the Roles of Each Agent - -In the Werewolf game, agents assume a variety of roles, each endowed with distinctive abilities and objectives. Below, we will outline the agent classes corresponding to each role: - -- Villager: Ordinary townsfolk trying to survive. -- Werewolf: Predators in disguise, aiming to outlast the villagers. -- Seer: A villager with the power to see the true nature of one player each night. -- Witch: A villager who can save or poison a player each night. - -To implement your own agent, you need to inherit `AgentBase` and implement the `reply` function, which is executed when an agent instance is called via `agent1(x)`. - -```python -from agentscope.agents import AgentBase -from agentscope.message import Msg - - -from typing import Optional, Union, Sequence - -class MyAgent(AgentBase): - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - # Do something here - ... - return x -``` - -AgentScope provides several out-of-the-box Agents implements and organizes them as an *Agent Pool*. In this application, we use a built-in agent, `DictDialogAgent`. Here we give an example configuration of `DictDialogAgent` for a player assigned as the role of a werewolf: - -```json -{ - "class": "DictDialogAgent", - "args": { - "name": "Player1", - "sys_prompt": "Act as a player in a werewolf game. You are Player1 and\nthere are totally 6 players, named Player1, Player2, Player3, Player4, Player5 and Player6.\n\nPLAYER ROLES:\nIn werewolf game, players are divided into two werewolves, two villagers, one seer, and one witch. Note only werewolves know who are their teammates.\nWerewolves: They know their teammates' identities and attempt to eliminate a villager each night while trying to remain undetected.\nVillagers: They do not know who the werewolves are and must work together during the day to deduce who the werewolves might be and vote to eliminate them.\nSeer: A villager with the ability to learn the true identity of one player each night. This role is crucial for the villagers to gain information.\nWitch: A character who has a one-time ability to save a player from being eliminated at night (sometimes this is a potion of life) and a one-time ability to eliminate a player at night (a potion of death).\n\nGAME RULE:\nThe game consists of two phases: night phase and day phase. The two phases are repeated until werewolf or villager wins the game.\n1. Night Phase: During the night, the werewolves discuss and vote for a player to eliminate. Special roles also perform their actions at this time (e.g., the Seer chooses a player to learn their role, the witch chooses a decide if save the player).\n2. Day Phase: During the day, all surviving players discuss who they suspect might be a werewolf. No one reveals their role unless it serves a strategic purpose. After the discussion, a vote is taken, and the player with the most votes is \"lynched\" or eliminated from the game.\n\nVICTORY CONDITION:\nFor werewolves, they win the game if the number of werewolves is equal to or greater than the number of remaining villagers.\nFor villagers, they win if they identify and eliminate all of the werewolves in the group.\n\nCONSTRAINTS:\n1. Your response should be in the first person.\n2. This is a conversational game. You should respond only based on the conversation history and your strategy.\n\nYou are playing werewolf in this game.\n", - "model_config_name": "gpt-3.5-turbo", - "use_memory": true - } -} -``` - -In this configuration, `Player1` is designated as a `DictDialogAgent`. The parameters include a system prompt (`sys_prompt`) that can guide the agent's behavior, a model config name (`model_config_name`) that determines the name of the model configuration, and a flag (`use_memory`) indicating whether the agent should remember past interactions. - -For other players, configurations can be customized based on their roles. Each role may have different prompts, models, or memory settings. You can refer to the JSON file located at `examples/game_werewolf/configs/agent_configs.json` within the AgentScope examples directory. - -### Step 3: Initialize AgentScope and the Agents - -Now we have defined the roles in the application and we can initialize the AgentScope environment and all agents. This process is simplified by AgentScope via a few lines, based on the configuration files we've prepared (assuming there are **2** werewolves, **2** villagers, **1** witch, and **1** seer): - -```python -import agentscope - -# read model and agent configs, and initialize agents automatically -survivors = agentscope.init( - model_configs="./configs/model_configs.json", - agent_configs="./configs/agent_configs.json", - logger_level="DEBUG", -) - -# Define the roles within the game. This list should match the order and number -# of agents specified in the 'agent_configs.json' file. -roles = ["werewolf", "werewolf", "villager", "villager", "seer", "witch"] - -# Based on their roles, assign the initialized agents to variables. -# This helps us reference them easily in the game logic. -wolves, villagers, witch, seer = survivors[:2], survivors[2:-2], survivors[-1], survivors[-2] -``` - -Through this snippet of code, we've allocated roles to our agents and associated them with the configurations that dictate their behavior in the application. - -### Step 4: Set Up the Game Logic - -In this step, you will set up the game logic and orchestrate the flow of the Werewolf game using AgentScope's helper utilities. - -#### Parser - -In order to allow `DictDialogAgent` to output fields customized by the users, and to increase the success rate of parsing different fields by LLMs, we have added the `parser` module. Here is the configuration of a parser example: - -``` -to_wolves_vote = "Which player do you vote to kill?" - -wolves_vote_parser = MarkdownJsonDictParser( - content_hint={ - "thought": "what you thought", - "vote": "player_name", - }, - required_keys=["thought", "vote"], - keys_to_memory="vote", - keys_to_content="vote", -) -``` - -For more details about the `parser` module,please see [here](https://modelscope.github.io/agentscope/en/tutorial/203-parser.html). - -#### Leverage Pipeline and MsgHub - -To simplify the construction of agent communication, AgentScope provides two helpful concepts: **Pipeline** and **MsgHub**. - -- **Pipeline**: It allows users to program communication among agents easily. - - ```python - from agentscope.pipelines import SequentialPipeline - - pipe = SequentialPipeline(agent1, agent2, agent3) - x = pipe(x) # the message x will be passed and replied by agent 1,2,3 in order - ``` - -- **MsgHub**: You may have noticed that all the above examples are one-to-one communication. To achieve a group chat, we provide another communication helper utility `msghub`. With it, the messages from participants will be broadcast to all other participants automatically. In such cases, participating agents even don't need input and output messages. All we need to do is to decide the order of speaking. Besides, `msghub` also supports dynamic control of participants. - - ```python - with msghub(participants=[agent1, agent2, agent3]) as hub: - agent1() - agent2() - - # Broadcast a message to all participants - hub.broadcast(Msg("Host", "Welcome to join the group chat!")) - - # Add or delete participants dynamically - hub.delete(agent1) - hub.add(agent4) - ``` - -#### Implement Werewolf Pipeline - -The game logic is divided into two major phases: (1) night when werewolves act, and (2) daytime when all players discuss and vote. Each phase will be handled by a section of code using pipelines to manage multi-agent communications. - -- **1.1 Night Phase: Werewolves Discuss and Vote** - -During the night phase, werewolves must discuss among themselves to decide on a target. The `msghub` function creates a message hub for the werewolves to communicate in, where every message sent by an agent is observable by all other agents within the `msghub`. - -```python -# start the game -for i in range(1, MAX_GAME_ROUND + 1): - # Night phase: werewolves discuss - hint = HostMsg(content=Prompts.to_wolves.format(n2s(wolves))) - with msghub(wolves, announcement=hint) as hub: - set_parsers(wolves, Prompts.wolves_discuss_parser) - for _ in range(MAX_WEREWOLF_DISCUSSION_ROUND): - x = sequentialpipeline(wolves) - if x.metadata.get("finish_discussion", False): - break -``` - -After the discussion, werewolves proceed to vote for their target, and the majority's choice is determined. The result of the vote is then broadcast to all werewolves. - -**Note**: the detailed prompts and utility functions can be found in `examples/game_werewolf`. - -```python - # werewolves vote - set_parsers(wolves, Prompts.wolves_vote_parser) - hint = HostMsg(content=Prompts.to_wolves_vote) - votes = [extract_name_and_id(wolf(hint).content)[0] for wolf in wolves] - # broadcast the result to werewolves - dead_player = [majority_vote(votes)] - hub.broadcast( - HostMsg(content=Prompts.to_wolves_res.format(dead_player[0])), - ) -``` - -- **1.2 Witch's Turn** - -If the witch is still alive, she gets the opportunity to use her powers to either save the player chosen by the werewolves or use her poison. - -```python - # Witch's turn - healing_used_tonight = False - if witch in survivors: - if healing: - # Witch decides whether to use the healing potion - hint = HostMsg( - content=Prompts.to_witch_resurrect.format_map( - {"witch_name": witch.name, "dead_name": dead_player[0]}, - ), - ) - # Witch decides whether to use the poison - set_parsers(witch, Prompts.witch_resurrect_parser) - if witch(hint).metadata.get("resurrect", False): - healing_used_tonight = True - dead_player.pop() - healing = False -``` - -- **1.3 Seer's Turn** - -The seer has a chance to reveal the true identity of a player. This information can be crucial for the villagers. The `observe()` function allows each agent to take note of a message without immediately replying to it. - -```python - # Seer's turn - if seer in survivors: - # Seer chooses a player to reveal their identity - hint = HostMsg( - content=Prompts.to_seer.format(seer.name, n2s(survivors)), - ) - set_parsers(seer, Prompts.seer_parser) - x = seer(hint) - - player, idx = extract_name_and_id(x.content) - role = "werewolf" if roles[idx] == "werewolf" else "villager" - hint = HostMsg(content=Prompts.to_seer_result.format(player, role)) - seer.observe(hint) -``` - -- **1.4 Update Alive Players** - -Based on the actions taken during the night, the list of surviving players needs to be updated. - -```python - # Update the list of survivors and werewolves after the night's events - survivors, wolves = update_alive_players(survivors, wolves, dead_player) -``` - -- **2.1 Daytime Phase: Discussion and Voting** - -During the day, all players will discuss and then vote to eliminate a suspected werewolf. - -```python - # Daytime discussion - with msghub(survivors, announcement=hints) as hub: - # Discuss - set_parsers(survivors, Prompts.survivors_discuss_parser) - x = sequentialpipeline(survivors) - # Vote - set_parsers(survivors, Prompts.survivors_vote_parser) - hint = HostMsg(content=Prompts.to_all_vote.format(n2s(survivors))) - votes = [extract_name_and_id(_(hint).content)[0] for _ in survivors] - vote_res = majority_vote(votes) - # Broadcast the voting result to all players - result = HostMsg(content=Prompts.to_all_res.format(vote_res)) - hub.broadcast(result) - # Update the list of survivors and werewolves after the vote - survivors, wolves = update_alive_players(survivors, wolves, vote_res) -``` - -- **2.2 Check for Winning Conditions** - -After each phase, the game checks if the werewolves or villagers have won. - -```python - # Check if either side has won - if check_winning(survivors, wolves, "Moderator"): - break -``` - -- **2.3 Continue to the Next Round** - -If neither werewolves nor villagers win, the game continues to the next round. - -```python - # If the game hasn't ended, prepare for the next round - hub.broadcast(HostMsg(content=Prompts.to_all_continue)) -``` - -These code blocks outline the core game loop for Werewolf using AgentScope's `msghub` and `pipeline`, which help to easily manage the operational logic of an application. - -### Step 5: Run the Application - -With the game logic and agents set up, you're ready to run the Werewolf game. By executing the `pipeline`, the game will proceed through the predefined phases, with agents interacting based on their roles and the strategies coded above: - -```bash -cd examples/game_werewolf -python werewolf.py # Assuming the pipeline is implemented in werewolf.py -``` - -It is recommended that you start the game in [AgentScope Studio](https://modelscope.github.io/agentscope/en/tutorial/209-gui.html), where you -will see the following output in the corresponding link: - -![s](https://img.alicdn.com/imgextra/i3/O1CN01n2Q2tR1aCFD2gpTdu_!!6000000003293-1-tps-960-482.gif) - -[[Return to the top]](#104-usecase-en) diff --git a/docs/sphinx_doc/en/source/tutorial/105-logging.md b/docs/sphinx_doc/en/source/tutorial/105-logging.md deleted file mode 100644 index 4f1f9dd00..000000000 --- a/docs/sphinx_doc/en/source/tutorial/105-logging.md +++ /dev/null @@ -1,65 +0,0 @@ -(105-logging-en)= - -# Logging - -Welcome to the tutorial on logging in multi-agent applications with AgentScope. We'll also touch on how you can visualize these logs using a simple web interface. This guide will help you track the agent's interactions and system information in a clearer and more organized way. - -## Logging - -The logging utilities consist of a custom setup for the `loguru.logger`, which is an enhancement over Python's built-in `logging` module. We provide custom features: - -- **Colored Output**: Assigns different colors to different speakers in a chat to enhance readability. -- **Redirecting Standard Error (stderr)**: Captures error messages and logs them with the `ERROR` level. -- **Custom Log Levels**: Adds a custom level called `CHAT` that is specifically designed for logging dialogue interactions. -- **Special Formatting**: Format logs with timestamps, levels, function names, and line numbers. Chat messages are formatted differently to stand out. - -### Setting Up the Logger - -We recommend setting up the logger via `agentscope.init`, and you can set the log level: - -```python -import agentscope - -LOG_LEVEL = Literal[ - "CHAT", - "TRACE", - "DEBUG", - "INFO", - "SUCCESS", - "WARNING", - "ERROR", - "CRITICAL", -] - -agentscope.init(..., logger_level="INFO") -``` - -### Logging a Chat Message - -Logging chat messages helps keep a record of the conversation between agents. Here's how you can do it: - -```python -# Log a simple string message. -logger.chat("Hello World!") - -# Log a `msg` representing dialogue with a speaker and content. -logger.chat({"name": "User", "content": "Hello, how are you?"}) -logger.chat({"name": "Agent", "content": "I'm fine, thank you!"}) -``` - -### Logging a System information - -System logs are crucial for tracking the application's state and identifying issues. Here's how to log different levels of system information: - -```python -# Log general information useful for understanding the flow of the application. -logger.info("The dialogue agent has started successfully.") - -# Log a warning message indicating a potential issue that isn't immediately problematic. -logger.warning("The agent is running slower than expected.") - -# Log an error message when something has gone wrong. -logger.error("The agent encountered an unexpected error while processing a request.") -``` - -[[Return to the top]](#105-logging-en) diff --git a/docs/sphinx_doc/en/source/tutorial/201-agent.md b/docs/sphinx_doc/en/source/tutorial/201-agent.md deleted file mode 100644 index 1a90bf589..000000000 --- a/docs/sphinx_doc/en/source/tutorial/201-agent.md +++ /dev/null @@ -1,189 +0,0 @@ -(201-agent-en)= - -# Agent - -This tutorial helps you to understand the `Agent` in more depth and navigate through the process of crafting your own custom agent with AgentScope. We start by introducing the fundamental abstraction called `AgentBase`, which serves as the base class to maintain the general behaviors of all agents. Then, we will go through the *AgentPool*, an ensemble of pre-built, specialized agents, each designed with a specific purpose in mind. Finally, we will demonstrate how to customize your own agent, ensuring it fits the needs of your project. - -## Understanding `AgentBase` - -The `AgentBase` class is the architectural cornerstone for all agent constructs within the AgentScope. As the superclass of all custom agents, it provides a comprehensive template consisting of essential attributes and methods that underpin the core functionalities of any conversational agent. - -Each AgentBase derivative is composed of several key characteristics: - -* `memory`: This attribute enables agents to retain and recall past interactions, allowing them to maintain context in ongoing conversations. For more details about `memory`, we defer to [Memory and Message Management](205-memory). - -* `model`: The model is the computational engine of the agent, responsible for making a response given existing memory and input. For more details about `model`, we defer to [Using Different Model Sources with Model API](#203-model). - -* `sys_prompt` & `engine`: The system prompt acts as predefined instructions that guide the agent in its interactions; and the `engine` is used to dynamically generate a suitable prompt. For more details about them, we defer to [Prompt Engine](206-prompt). - -* `to_dist`: Used to create a distributed version of the agent, to support efficient collaboration among multiple agents. Note that `to_dist` is a reserved field and will be automatically added to the initialization function of any subclass of `AgentBase`. For more details about `to_dist`, please refer to [Distribution](208-distribute). - -In addition to these attributes, `AgentBase` endows agents with pivotal methods such as `observe` and `reply`: - -* `observe()`: Through this method, an agent can take note of *message* without immediately replying, allowing it to update its memory based on the observed *message*. -* `reply()`: This is the primary method that developers must implement. It defines the agent's behavior in response to an incoming *message*, encapsulating the logic that results in the agent's output. - -Besides, for unified interfaces and type hints, we introduce another base class `Operator`, which indicates performing some operation on input data by the `__call__` function. And we make `AgentBase` a subclass of `Operator`. - -```python -class AgentBase(Operator): - # ... [code omitted for brevity] - - def __init__( - self, - name: str, - sys_prompt: Optional[str] = None, - model_config_name: str = None, - use_memory: bool = True, - ) -> None: - - # ... [code omitted for brevity] - def observe(self, x: Union[Msg, Sequence[Msg]]) -> None: - # An optional method for updating the agent's internal state based on - # messages it has observed. This method can be used to enrich the - # agent's understanding and memory without producing an immediate - # response. - if self.memory: - self.memory.add(x) - - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - # The core method to be implemented by custom agents. It defines the - # logic for processing an input message and generating a suitable - # response. - raise NotImplementedError( - f"Agent [{type(self).__name__}] is missing the required " - f'"reply" function.', - ) - - # ... [code omitted for brevity] -``` - -## Exploring the AgentPool - -The *AgentPool* within AgentScope is a curated ensemble of ready-to-use, specialized agents. Each of these agents is tailored for a distinct role and comes equipped with default behaviors that address specific tasks. The *AgentPool* is designed to expedite the development process by providing various templates of `Agent`. - -Below is a table summarizing the functionality of some of the key agents available in the Agent Pool: - -| Agent Type | Description | Typical Use Cases | -| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------- | -| `AgentBase` | Serves as the superclass for all agents, providing essential attributes and methods. | The foundation for building any custom agent. | -| `DialogAgent` | Manages dialogues by understanding context and generating coherent responses. | Customer service bots, virtual assistants. | -| `DictDialogAgent` | Manages dialogues by understanding context and generating coherent responses, and the responses are in json format. | Customer service bots, virtual assistants. | -| `UserAgent` | Interacts with the user to collect input, generating messages that may include URLs or additional specifics based on required keys. | Collecting user input for agents | -| `ReActAgent` | An agent class that implements the ReAct algorithm. | Solving complex tasks | -| *More to Come* | AgentScope is continuously expanding its pool with more specialized agents for diverse applications. | | - -## Customizing Agents from the AgentPool - -Customizing an agent from AgentPool enables you to tailor its functionality to meet the unique demands of your multi-agent application. You have the flexibility to modify existing agents with minimal effort by **adjusting configurations** and prompts or, for more extensive customization, you can engage in secondary development. - -Below, we provide usages of how to configure various agents from the AgentPool: - -### `DialogAgent` - -* **Reply Method**: The `reply` method is where the main logic for processing input *message* and generating responses. - -```python -def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - # Additional processing steps can occur here - - # Record the input if needed - if self.memory: - self.memory.add(x) - - # Generate a prompt for the language model using the system prompt and memory - prompt = self.model.format( - Msg("system", self.sys_prompt, role="system"), - self.memory - and self.memory.get_memory() - or x, # type: ignore[arg-type] - ) - - # Invoke the language model with the prepared prompt - response = self.model(prompt).text - - #Format the response and create a message object - msg = Msg(self.name, response, role="assistant") - - # Print/speak the message in this agent's voice - self.speak(msg) - - # Record the message to memory and return it - if self.memory: - self.memory.add(msg) - - return msg -``` - -* **Usages:** To tailor a `DialogAgent` for a customer service bot: - -```python -from agentscope.agents import DialogAgent - -# Configuration for the DialogAgent -dialog_agent_config = { - "name": "ServiceBot", - "model_config_name": "gpt-3.5", # Specify the model used for dialogue generation - "sys_prompt": "Act as AI assistant to interact with the others. Try to " - "reponse on one line.\n", # Custom prompt for the agent - # Other configurations specific to the DialogAgent -} - -# Create and configure the DialogAgent -service_bot = DialogAgent(**dialog_agent_config) -``` - -### `UserAgent` - -* **Reply Method**: This method processes user input by prompting for content and if needed, additional keys and a URL. The gathered data is stored in a *message* object in the agent's memory for logging or later use and returns the message as a response. - -```python -def reply( - self, - x: Optional[Union[Msg, Sequence[Msg]]] = None, - required_keys: Optional[Union[list[str], str]] = None, -) -> Msg: - # Check if there is initial data to be added to memory - if self.memory: - self.memory.add(x) - - content = input(f"{self.name}: ") # Prompt the user for input - kwargs = {} - - # Prompt for additional information based on the required keys - if required_keys is not None: - if isinstance(required_keys, str): - required_keys = [required_keys] - for key in required_keys: - kwargs[key] = input(f"{key}: ") - - # Optionally prompt for a URL if required - url = None - if self.require_url: - url = input("URL: ") - - # Create a message object with the collected input and additional details - msg = Msg(self.name, content=content, url=url, **kwargs) - - # Add the message object to memory - if self.memory: - self.memory.add(msg) - return msg -``` - -* **Usages:** To configure a `UserAgent` for collecting user input and URLs (of file, image, video, audio , or website): - -```python -from agentscope.agents import UserAgent - -# Configuration for UserAgent -user_agent_config = { - "name": "User", - "require_url": True, # If true, the agent will require a URL -} - -# Create and configure the UserAgent -user_proxy_agent = UserAgent(**user_agent_config) -``` - -[[Return to the top]](#201-agent-en) diff --git a/docs/sphinx_doc/en/source/tutorial/202-pipeline.md b/docs/sphinx_doc/en/source/tutorial/202-pipeline.md deleted file mode 100644 index 160c7d0fa..000000000 --- a/docs/sphinx_doc/en/source/tutorial/202-pipeline.md +++ /dev/null @@ -1,301 +0,0 @@ -(202-pipeline-en)= - -# Pipeline and MsgHub - -**Pipeline & MsgHub** (message hub) are one or a sequence of steps describing how the structured `Msg` passes between multi-agents, which streamlines the process of collaboration across agents. - -`Pipeline` allows users to program communication among agents easily, and `MsgHub` enables message sharing among agents like a group chat. - -## Pipelines - -`Pipeline` in AgentScope serves as conduits through which messages pass among agents. In AgentScope, an `Agent` is a subclass of an `Operator` that performs some operation on input data. Pipelines extend this concept by encapsulating multiple agents, and also act as an `Operator`. - -Here is the base class for all pipeline types: - -```python -class PipelineBase(Operator): - """Base interface of all pipelines.""" - # ... [code omitted for brevity] - @abstractmethod - def __call__(self, x: Optional[dict] = None) -> dict: - """Define the actions taken by this pipeline. - - Args: - x (Optional[`dict`], optional): - Dialog history and some environmental information - - Returns: - `dict`: The pipeline's response to the input. - """ -``` - -### Category - -AgentScope provides two main types of pipelines based on their implementation strategy: - -* **Operator-Type Pipelines** - - * These pipelines are object-oriented and inherit from the `PipelineBase`. They are operators themselves and can be combined with other operators to create complex interaction patterns. - - ```python - # Instantiate and invoke - pipeline = ClsPipeline(agent1, agent2, agent3) - x = pipeline(x) - ``` - -* **Functional Pipelines** - - * Functional pipelines provide similar control flow mechanisms as the class-based pipelines but are implemented as standalone functions. These are useful for scenarios where a class-based setup may not be necessary or preferred. - - ```python - # Just invoke - x = funcpipeline(agent1, agent2, agent3, x) - ``` - -Pipelines are categorized based on their functionality, much like programming language constructs. The table below outlines the different pipelines available in AgentScope: - -| Operator-Type Pipeline | Functional Pipeline | Description | -| -------------------- | -------------------- | ------------------------------------------------------------ | -| `SequentialPipeline` | `sequentialpipeline` | Executes a sequence of operators in order, passing the output of one as the input to the next. | -| `IfElsePipeline` | `ifelsepipeline` | Implements conditional logic, executing one operator if a condition is true and another if it is false. | -| `SwitchPipeline` | `switchpipeline` | Facilitates multi-branch selection, executing an operator from a mapped set based on the evaluation of a condition. | -| `ForLoopPipeline` | `forlooppipeline` | Repeatedly executes an operator for a set number of iterations or until a specified break condition is met. | -| `WhileLoopPipeline` | `whilelooppipeline` | Continuously executes an operator as long as a given condition remains true. | -| - | `placeholder` | Acts as a placeholder in branches that do not require any operations in flow control like if-else/switch | - -### Usage - -This section illustrates how pipelines can simplify the implementation of logic in multi-agent applications by comparing the usage of pipelines versus approaches without pipelines. - -**Note:** Please note that in the examples provided below, we use the term `agent` to represent any instance that can act as an `Operator`. This is for ease of understanding and to illustrate how pipelines orchestrate interactions between different operations. You can replace `agent` with any `Operator`, thus allowing for a mix of `agent` and `pipeline` in practice. - -#### `SequentialPipeline` - -* Without pipeline: - - ```python - x = agent1(x) - x = agent2(x) - x = agent3(x) - ``` - -* Using pipeline: - - ```python - from agentscope.pipelines import SequentialPipeline - - pipe = SequentialPipeline([agent1, agent2, agent3]) - x = pipe(x) - ``` - -* Using functional pipeline: - - ```python - from agentscope.pipelines import sequentialpipeline - - x = sequentialpipeline([agent1, agent2, agent3], x) - ``` - -#### `IfElsePipeline` - -* Without pipeline: - - ```python - if condition(x): - x = agent1(x) - else: - x = agent2(x) - ``` - -* Using pipeline: - - ```python - from agentscope.pipelines import IfElsePipeline - - pipe = IfElsePipeline(condition, agent1, agent2) - x = pipe(x) - ``` - -* Using functional pipeline: - - ```python - from agentscope.functional import ifelsepipeline - - x = ifelsepipeline(condition, agent1, agent2, x) - ``` - -#### `SwitchPipeline` - -* Without pipeline: - - ```python - switch_result = condition(x) - if switch_result == case1: - x = agent1(x) - elif switch_result == case2: - x = agent2(x) - else: - x = default_agent(x) - ``` - -* Using pipeline: - - ```python - from agentscope.pipelines import SwitchPipeline - - case_operators = {case1: agent1, case2: agent2} - pipe = SwitchPipeline(condition, case_operators, default_agent) - x = pipe(x) - ``` - -* Using functional pipeline: - - ```python - from agentscope.functional import switchpipeline - - case_operators = {case1: agent1, case2: agent2} - x = switchpipeline(condition, case_operators, default_agent, x) - ``` - -#### `ForLoopPipeline` - -* Without pipeline: - - ```python - for i in range(max_iterations): - x = agent(x) - if break_condition(x): - break - ``` - -* Using pipeline: - - ```python - from agentscope.pipelines import ForLoopPipeline - - pipe = ForLoopPipeline(agent, max_iterations, break_condition) - x = pipe(x) - ``` - -* Using functional pipeline: - - ```python - from agentscope.functional import forlooppipeline - - x = forlooppipeline(agent, max_iterations, break_condition, x) - ``` - -#### `WhileLoopPipeline` - -* Without pipeline: - - ```python - while condition(x): - x = agent(x) - ``` - -* Using pipeline: - - ```python - from agentscope.pipelines import WhileLoopPipeline - - pipe = WhileLoopPipeline(agent, condition) - x = pipe(x) - ``` - -* Using functional pipeline: - - ```python - from agentscope.functional import whilelooppipeline - - x = whilelooppipeline(agent, condition, x) - ``` - -### Pipeline Combination - -It's worth noting that AgentScope supports the combination of pipelines to create complex interactions. For example, we can create a pipeline that executes a sequence of agents in order, and then executes another pipeline that executes a sequence of agents in condition. - -```python -from agentscope.pipelines import SequentialPipeline, IfElsePipeline -# Create a pipeline that executes agents in order -pipe1 = SequentialPipeline([agent1, agent2, agent3]) -# Create a pipeline that executes agents in ifElsePipeline -pipe2 = IfElsePipeline(condition, agent4, agent5) -# Create a pipeline that executes pipe1 and pipe2 in order -pipe3 = SequentialPipeline([pipe1, pipe2]) -# Invoke the pipeline -x = pipe3(x) -``` - -## MsgHub - -`MsgHub` is designed to manage dialogue among a group of agents, allowing for the sharing of messages. Through `MsgHub`, agents can broadcast messages to all other agents in the group with `broadcast`. - -Here is the core class for a `MsgHub`: - -```python -class MsgHubManager: - """MsgHub manager class for sharing dialog among a group of agents.""" - # ... [code omitted for brevity] - - def broadcast(self, msg: Union[dict, list[dict]]) -> None: - """Broadcast the message to all participants.""" - for agent in self.participants: - agent.observe(msg) - - def add(self, new_participant: Union[Sequence[AgentBase], AgentBase]) -> None: - """Add new participant into this hub""" - # ... [code omitted for brevity] - - def delete(self, participant: Union[Sequence[AgentBase], AgentBase]) -> None: - """Delete agents from participant.""" - # ... [code omitted for brevity] -``` - -### Usage - -#### Creating a MsgHub - -To create a `MsgHub`, instantiate a `MsgHubManager` by calling the `msghub` helper function with a list of participating agents. Additionally, you can supply an optional initial announcement that, if provided, will be broadcast to all participants upon initialization. - -```python -from agentscope.msg_hub import msghub - -# Initialize MsgHub with participating agents -hub_manager = msghub( - participants=[agent1, agent2, agent3], announcement=initial_announcement -) -``` - -#### Broadcast message in MsgHub - -The `MsgHubManager` can be used with a context manager to handle the setup and teardown of the message hub environment: - -```python -with msghub( - participants=[agent1, agent2, agent3], announcement=initial_announcement -) as hub: - # Agents can now broadcast and receive messages within this block - agent1() - agent2() - - # Or manually broadcast a message - hub.broadcast(some_message) - -``` - -Upon exiting the context block, the `MsgHubManager` ensures that each agent's audience is cleared, preventing any unintended message sharing outside of the hub context. - -#### Adding and Deleting Participants - -You can dynamically add or remove agents from the `MsgHub`: - -```python -# Add a new participant -hub.add(new_agent) - -# Remove an existing participant -hub.delete(existing_agent) -``` - -[[Return to the top]](#202-pipeline-en) diff --git a/docs/sphinx_doc/en/source/tutorial/203-model.md b/docs/sphinx_doc/en/source/tutorial/203-model.md deleted file mode 100644 index 06316b241..000000000 --- a/docs/sphinx_doc/en/source/tutorial/203-model.md +++ /dev/null @@ -1,635 +0,0 @@ -(203-model-en)= - -# Model - -In AgentScope, the model deployment and invocation are decoupled by `ModelWrapper`. -Developers can specify their own model by providing model configurations, -and AgentScope also provides scripts to support developers to customize -model services. - -## Supported Models - -Currently, AgentScope supports the following model service APIs: - -- OpenAI API, including chat, image generation (DALL-E), and Embedding. -- DashScope API, including chat, image sythesis and text embedding. -- Gemini API, including chat and embedding. -- ZhipuAI API, including chat and embedding. -- Ollama API, including chat, embedding and generation. -- LiteLLM API, including chat, with various model APIs. -- Post Request API, model inference services based on Post - requests, including Huggingface/ModelScope Inference API and various - post request based model APIs. -- Anthropic Chat API. - -## Configuration - -In AgentScope, users specify the model configuration through the -`model_configs` parameter in the `agentscope.init` interface. -`model_configs` can be a **dictionary**, **a list of dictionaries**, or a -**path** to model configuration file. - -```python -import agentscope - -agentscope.init(model_configs=MODEL_CONFIG_OR_PATH) -``` - -### Configuration Format - -In AgentScope, the model configuration is a dictionary used to specify the type of model and set the call parameters. -We divide the fields in the model configuration into two categories: _basic parameters_ and _detailed parameters_. - -Among them, the basic parameters include `config_name` and `model_type`, which are used to distinguish different model configurations and specific `ModelWrapper` types. -The detailed parameters will be fed into the corresponding model class's constructor to initialize the model instance. - -```python -{ - # Basic parameters - "config_name": "gpt-4-temperature-0.0", # Model configuration name - "model_type": "openai_chat", # Correspond to `ModelWrapper` type - - # Detailed parameters - # ... -} -``` - -#### Basic Parameters - -In basic parameters, `config_name` is the identifier of the model configuration, -which we will use to specify the model service when initializing an agent. - -`model_type` corresponds to the type of `ModelWrapper` and is used to specify the type of model service. -It corresponds to the `model_type` field in the `ModelWrapper` class in the source code. - -```python -class OpenAIChatWrapper(OpenAIWrapperBase): - """The model wrapper for OpenAI's chat API.""" - - model_type: str = "openai_chat" - # ... -``` - -In the current AgentScope, the supported `model_type` types, the corresponding -`ModelWrapper` classes, and the supported APIs are as follows: - -| API | Task | Model Wrapper | `model_type` | Some Supported Models | -|------------------------|-----------------|---------------------------------------------------------------------------------------------------------------------------------|-------------------------------|--------------------------------------------------| -| OpenAI API | Chat | [`OpenAIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/openai_model.py) | `"openai_chat"` | gpt-4, gpt-3.5-turbo, ... | -| | Embedding | [`OpenAIEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/openai_model.py) | `"openai_embedding"` | text-embedding-ada-002, ... | -| | DALL·E | [`OpenAIDALLEWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/openai_model.py) | `"openai_dall_e"` | dall-e-2, dall-e-3 | -| DashScope API | Chat | [`DashScopeChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_chat"` | qwen-plus, qwen-max, ... | -| | Image Synthesis | [`DashScopeImageSynthesisWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_image_synthesis"` | wanx-v1 | -| | Text Embedding | [`DashScopeTextEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_text_embedding"` | text-embedding-v1, text-embedding-v2, ... | -| | Multimodal | [`DashScopeMultiModalWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_multimodal"` | qwen-vl-plus, qwen-vl-max, qwen-audio-turbo, ... | -| Gemini API | Chat | [`GeminiChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/gemini_model.py) | `"gemini_chat"` | gemini-pro, ... | -| | Embedding | [`GeminiEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/gemini_model.py) | `"gemini_embedding"` | models/embedding-001, ... | -| ZhipuAI API | Chat | [`ZhipuAIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/zhipu_model.py) | `"zhipuai_chat"` | glm4, ... | -| | Embedding | [`ZhipuAIEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/zhipu_model.py) | `"zhipuai_embedding"` | embedding-2, ... | -| ollama | Chat | [`OllamaChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/ollama_model.py) | `"ollama_chat"` | llama2, ... | -| | Embedding | [`OllamaEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/ollama_model.py) | `"ollama_embedding"` | llama2, ... | -| | Generation | [`OllamaGenerationWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/ollama_model.py) | `"ollama_generate"` | llama2, ... | -| LiteLLM API | Chat | [`LiteLLMChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/litellm_model.py) | `"litellm_chat"` | - | -| Yi API | Chat | [`YiChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/yi_model.py) | `"yi_chat"` | yi-large, yi-medium, ... | -| Post Request based API | - | [`PostAPIModelWrapperBase`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/post_model.py) | - | - | -| | Chat | [`PostAPIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/post_model.py) | `"post_api_chat"` | meta-llama/Meta-Llama-3-8B-Instruct, ... | -| | Image Synthesis | [`PostAPIDALLEWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/post_model.py) | `post_api_dall_e` | - | | -| | Embedding | [`PostAPIEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/post_model.py) | `post_api_embedding` | - | -| Anthropic API | Chat | [`AnthropicChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/anthropic_model.py) | `"anthropic_chat"` | claude-3-5-sonnet-20241022, ... | - -#### Detailed Parameters - -In AgentScope, the detailed parameters are different according to the different `ModelWrapper` classes. -To specify the detailed parameters, you need to refer to the specific `ModelWrapper` class and its constructor. -Here we provide example configurations for different model wrappers. - -##### OpenAI API - -
-OpenAI Chat API (agents.models.OpenAIChatWrapper) - -```python -{ - "config_name": "{your_config_name}", - "model_type": "openai_chat", - - # Required parameters - "model_name": "gpt-4", - - # Optional parameters - "api_key": "{your_api_key}", # OpenAI API Key, if not provided, it will be read from the environment variable - "organization": "{your_organization}", # Organization name, if not provided, it will be read from the environment variable - "client_args": { # Parameters for initializing the OpenAI API Client - # e.g. "max_retries": 3, - }, - "generate_args": { # Parameters passed to the model when calling - # e.g. "temperature": 0.0 - }, -} -``` - -
- -
-OpenAI DALL·E API (agentscope.models.OpenAIDALLEWrapper) - -```python -{ - "config_name": "{your_config_name}", - "model_type": "openai_dall_e", - - # Required parameters - "model_name": "{model_name}", # OpenAI model name, e.g. dall-e-2, dall-e-3 - - # Optional parameters - "api_key": "{your_api_key}", # OpenAI API Key, if not provided, it will be read from the environment variable - "organization": "{your_organization}", # Organization name, if not provided, it will be read from the environment variable - "client_args": { # Parameters for initializing the OpenAI API Client - # e.g. "max_retries": 3, - }, - "generate_args": { # Parameters passed to the model when calling - # e.g. "n": 1, "size": "512x512" - } -} -``` - -
- -
-OpenAI Embedding API (agentscope.models.OpenAIEmbeddingWrapper) - -```python -{ - "config_name": "{your_config_name}", - "model_type": "openai_embedding", - - # Required parameters - "model_name": "{model_name}", # OpenAI model name, e.g. text-embedding-ada-002, text-embedding-3-small - - # Optional parameters - "api_key": "{your_api_key}", # OpenAI API Key, if not provided, it will be read from the environment variable - "organization": "{your_organization}", # Organization name, if not provided, it will be read from the environment variable - "client_args": { # Parameters for initializing the OpenAI API Client - # e.g. "max_retries": 3, - }, - "generate_args": { # Parameters passed to the model when calling - # e.g. "encoding_format": "float" - } -} -``` - -
- -
- -#### DashScope API - -
-DashScope Chat API (agentscope.models.DashScopeChatWrapper) - -```python -{ - "config_name": "my_dashscope_chat_config", - "model_type": "dashscope_chat", - - # Required parameters - "model_name": "{model_name}", # The model name in DashScope API, e.g. qwen-max - - # Optional parameters - "api_key": "{your_api_key}", # DashScope API Key, if not provided, it will be read from the environment variable - "generate_args": { - # e.g. "temperature": 0.5 - }, -} -``` - -
- -
-DashScope Image Synthesis API (agentscope.models.DashScopeImageSynthesisWrapper) - -```python -{ - "config_name": "my_dashscope_image_synthesis_config", - "model_type": "dashscope_image_synthesis", - - # Required parameters - "model_name": "{model_name}", # The model name in DashScope Image Synthesis API, e.g. wanx-v1 - - # Optional parameters - "api_key": "{your_api_key}", - "generate_args": { - "negative_prompt": "xxx", - "n": 1, - # ... - } -} -``` - -
- -
-DashScope Text Embedding API (agentscope.models.DashScopeTextEmbeddingWrapper) - -```python -{ - "config_name": "my_dashscope_text_embedding_config", - "model_type": "dashscope_text_embedding", - - # Required parameters - "model_name": "{model_name}", # The model name in DashScope Text Embedding API, e.g. text-embedding-v1 - - # Optional parameters - "api_key": "{your_api_key}", - "generate_args": { - # ... - }, -} -``` - -
- -
-DashScope Multimodal Conversation API (agentscope.models.DashScopeMultiModalWrapper) - -```python -{ - "config_name": "my_dashscope_multimodal_config", - "model_type": "dashscope_multimodal", - - # Required parameters - "model_name": "{model_name}", # The model name in DashScope Multimodal Conversation API, e.g. qwen-vl-plus - - # Optional parameters - "api_key": "{your_api_key}", - "generate_args": { - # ... - }, -} -``` - -
- -
- -#### Gemini API - -
-Gemini Chat API (agentscope.models.GeminiChatWrapper) - -```python -{ - "config_name": "my_gemini_chat_config", - "model_type": "gemini_chat", - - # Required parameters - "model_name": "{model_name}", # The model name in Gemini API, e.g. gemini-pro - - # Optional parameters - "api_key": "{your_api_key}", # If not provided, the API key will be read from the environment variable GEMINI_API_KEY -} -``` - -
- -
-Gemini Embedding API (agentscope.models.GeminiEmbeddingWrapper) - -```python -{ - "config_name": "my_gemini_embedding_config", - "model_type": "gemini_embedding", - - # Required parameters - "model_name": "{model_name}", # The model name in Gemini API, e.g. models/embedding-001 - - # Optional parameters - "api_key": "{your_api_key}", # If not provided, the API key will be read from the environment variable GEMINI_API_KEY -} -``` - -
- -
- - -#### ZhipuAI API - -
-ZhipuAI Chat API (agentscope.models.ZhipuAIChatWrapper) - -```python -{ - "config_name": "my_zhipuai_chat_config", - "model_type": "zhipuai_chat", - - # Required parameters - "model_name": "{model_name}", # The model name in ZhipuAI API, e.g. glm-4 - - # Optional parameters - "api_key": "{your_api_key}" -} -``` - -
- -
-ZhipuAI Embedding API (agentscope.models.ZhipuAIEmbeddingWrapper) - -```python -{ - "config_name": "my_zhipuai_embedding_config", - "model_type": "zhipuai_embedding", - - # Required parameters - "model_name": "{model_name}", # The model name in ZhipuAI API, e.g. embedding-2 - - # Optional parameters - "api_key": "{your_api_key}", -} -``` - -
- -
- - -#### Ollama API - -
-Ollama Chat API (agentscope.models.OllamaChatWrapper) - -```python -{ - "config_name": "my_ollama_chat_config", - "model_type": "ollama_chat", - - # Required parameters - "model_name": "{model_name}", # The model name used in ollama API, e.g. llama2 - - # Optional parameters - "options": { # Parameters passed to the model when calling - # e.g. "temperature": 0., "seed": 123, - }, - "keep_alive": "5m", # Controls how long the model will stay loaded into memory -} -``` - -
- -
-Ollama Generation API (agentscope.models.OllamaGenerationWrapper) - -```python -{ - "config_name": "my_ollama_generate_config", - "model_type": "ollama_generate", - - # Required parameters - "model_name": "{model_name}", # The model name used in ollama API, e.g. llama2 - - # Optional parameters - "options": { # Parameters passed to the model when calling - # "temperature": 0., "seed": 123, - }, - "keep_alive": "5m", # Controls how long the model will stay loaded into memory -} -``` - -
- -
-Ollama Embedding API (agentscope.models.OllamaEmbeddingWrapper) - -```python -{ - "config_name": "my_ollama_embedding_config", - "model_type": "ollama_embedding", - - # Required parameters - "model_name": "{model_name}", # The model name used in ollama API, e.g. llama2 - - # Optional parameters - "options": { # Parameters passed to the model when calling - # "temperature": 0., "seed": 123, - }, - "keep_alive": "5m", # Controls how long the model will stay loaded into memory -} -``` - -
- -
- -#### LiteLLM Chat API - -
-LiteLLM Chat API (agentscope.models.LiteLLMChatModelWrapper) - -```python -{ - "config_name": "lite_llm_openai_chat_gpt-3.5-turbo", - "model_type": "litellm_chat", - "model_name": "gpt-3.5-turbo" # You should note that for different models, you should set the corresponding environment variables, such as OPENAI_API_KEY, etc. You may refer to https://docs.litellm.ai/docs/ for this. -}, -``` - -
- -
- -#### Post Request API - -
-Post Request Chat API (agentscope.models.PostAPIChatWrapper) - -```python -{ - "config_name": "my_postapichatwrapper_config", - "model_type": "post_api_chat", - - # Required parameters - "api_url": "https://xxx.xxx", - "headers": { - # e.g. "Authorization": "Bearer xxx", - }, - - # Optional parameters - "messages_key": "messages", -} -``` -> ⚠️ The Post Request Chat model wrapper (`PostAPIChatWrapper`) has the following properties: -> 1) The `.format()` function makes sure the input messages become a list of dicts. -> 2) The `._parse_response()` function assumes the generated text will be in `response["data"]["response"]["choices"][0]["message"]["content"]` - -
- - - -
-Post Request Image Synthesis API (agentscope.models.PostAPIDALLEWrapper) - -```python -{ - "config_name": "my_postapiwrapper_config", - "model_type": "post_api_dall_e", - - # Required parameters - "api_url": "https://xxx.xxx", - "headers": { - # e.g. "Authorization": "Bearer xxx", - }, - - # Optional parameters - "messages_key": "messages", -} -``` -> ⚠️ The Post Request Image Synthesis model wrapper (`PostAPIDALLEWrapper`) has the following properties: -> 1) The `._parse_response()` function assumes the generated image will be presented as urls in `response["data"]["response"]["data"][i]["url"]` - -
- - -
-Post Request Embedding API (agentscope.models.PostAPIEmbeddingWrapper) - -```python -{ - "config_name": "my_postapiwrapper_config", - "model_type": "post_api_embedding", - - # Required parameters - "api_url": "https://xxx.xxx", - "headers": { - # e.g. "Authorization": "Bearer xxx", - }, - - # Optional parameters - "messages_key": "messages", -} -``` -> ⚠️ The Post Request Embedding model wrapper (`PostAPIEmbeddingWrapper`) has the following properties: -> 1) The `._parse_response()` function assumes the generated embeddings will be in `response["data"]["response"]["data"][i]["embedding"]` - -
- - -
-Post Request API (agentscope.models.PostAPIModelWrapperBase) - -```python -{ - "config_name": "my_postapiwrapper_config", - "model_type": "post_api_chat", - - # Required parameters - "api_url": "https://xxx.xxx", - "headers": { - # e.g. "Authorization": "Bearer xxx", - }, - - # Optional parameters - "messages_key": "messages", -} -``` -> ⚠️ Post Request model wrapper (`PostAPIModelWrapperBase`) returns raw HTTP responses from the API in ModelResponse, and the `.format()` is not implemented. It is recommended to use `Post Request Chat API` when running examples with chats. -> `PostAPIModelWrapperBase` can be used when -> 1) only the raw HTTP response is wanted and `.format()` is not called; -> 2) Or, the developers want to overwrite the `.format()` and/or `._parse_response()` functions. - -
- -
- -#### Anthropic API - -
- -Anthropic Chat API (agentscope.models.AnthropicChatWrapper) - - -```python -{ - "model_config": "my_anthropic_chat_config", - "model_type": "anthropic_chat", - "model_name": "claude-3-5-sonnet-20241022", - - # Required parameters - "api_key": "{your_api_key}", - - # Optional parameters - "temperature": 0.5 -} -``` -
- - -
- -## Build Model Service from Scratch - -For developers who need to build their own model services, AgentScope -provides some scripts to help developers quickly build model services. -You can find these scripts and instructions in the [scripts](https://github.com/modelscope/agentscope/tree/main/scripts) -directory. - -Specifically, AgentScope provides the following model service scripts: - -- [CPU inference engine **ollama**](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#ollama) -- [Model service based on **Flask + Transformers**](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#with-transformers-library) -- [Model service based on **Flask + ModelScope**](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#with-modelscope-library) -- [**FastChat** inference engine](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#fastchat) -- [**vllm** inference engine](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#vllm) - -About how to quickly start these model services, users can refer to the [README.md](https://github.com/modelscope/agentscope/blob/main/scripts/README.md) file under the [scripts](https://github.com/modelscope/agentscope/blob/main/scripts/) directory. - -## Creat Your Own Model Wrapper - -AgentScope allows developers to customize their own model wrappers. -The new model wrapper class should - -- inherit from `ModelWrapperBase` class, -- provide a `model_type` field to identify this model wrapper in the model configuration, and -- implement its `__init__` and `__call__` functions. -- register the new model wrapper class by calling `agentscope.register_model_wrapper_class` function - -The following is an example for creating a new model wrapper class. - -```python -from agentscope.models import ModelWrapperBase - -class MyModelWrapper(ModelWrapperBase): - - model_type: str = "my_model" - - def __init__(self, config_name, my_arg1, my_arg2, **kwargs): - # Initialize the model instance - super().__init__(config_name=config_name) - # ... - - def __call__(self, input, **kwargs) -> str: - # Call the model instance - # ... -``` - -Then we register the new model wrapper class and use it in the model configuration. - -```python -import agentscope - -agentscope.register_model_wrapper_class(MyModelWrapper) - -my_model_config = { - # Basic parameters - "config_name": "my_model_config", - "model_type": "my_model", - - # Detailed parameters - "my_arg1": "xxx", - "my_arg2": "yyy", - # ... -} -``` - -[[Return to Top]](#203-model-en) diff --git a/docs/sphinx_doc/en/source/tutorial/203-parser.md b/docs/sphinx_doc/en/source/tutorial/203-parser.md deleted file mode 100644 index 5bbf46dd7..000000000 --- a/docs/sphinx_doc/en/source/tutorial/203-parser.md +++ /dev/null @@ -1,530 +0,0 @@ -(203-parser-en)= - -# Response Parser - -## Table of Contents - -- [Background](#background) -- [Parser Module](#parser-module) - - [Overview](#overview) - - [String Type](#string-type) - - [MarkdownCodeBlockParser](#markdowncodeblockparser) - - [Initialization](#initialization) - - [Format Instruction Template](#format-instruction-template) - - [Parse Function](#parse-function) - - [Dictionary Type](#dictionary-type) - - [MarkdownJsonDictParser](#markdownjsondictparser) - - [Initialization & Format Instruction Template](#initialization--format-instruction-template) - - [Validation](#validation) - - [MultiTaggedContentParser](#multitaggedcontentparser) - - [Initialization & Format Instruction Template](#initialization--format-instruction-template-1) - - [Parse Function](#parse-function-1) - - [JSON / Python Object Type](#json--python-object-type) - - [MarkdownJsonObjectParser](#markdownjsonobjectparser) - - [Initialization & Format Instruction Template](#initialization--format-instruction-template-2) - - [Parse Function](#parse-function-2) -- [Typical Use Cases](#typical-use-cases) - - [WereWolf Game](#werewolf-game) - - [ReAct Agent and Tool Usage](#react-agent-and-tool-usage) -- [Customized Parser](#customized-parser) - -## Background - -In the process of building LLM-empowered application, parsing the LLM generated string into a specific format and extracting the required information is a very important step. -However, due to the following reasons, this process is also a very complex process: - -1. **Diversity**: The target format of parsing is diverse, and the information to be extracted may be a specific text, a JSON object, or a complex data structure. -2. **Complexity**: The result parsing is not only to convert the text generated by LLM into the target format, but also involves a series of issues such as prompt engineering (reminding LLM what format of output should be generated), error handling, etc. -3. **Flexibility**: Even in the same application, different stages may also require the agent to generate output in different formats. - -For the convenience of developers, AgentScope provides a parser module to help developers parse LLM response into a specific format. By using the parser module, developers can easily parse the response into the target format by simple configuration, and switch the target format flexibly. - -In AgentScope, the parser module features -1. **Flexibility**: Developers can flexibly set the required format, flexibly switch the parser without modifying the code of agent class. That is, the specific "target format" and the agent's `reply` function are decoupled. -2. **Freedom**: The format instruction, result parsing and prompt engineering are all explicitly finished in the `reply` function. Developers and users can freely choose to use the parser or parse LLM response by their own code. -3. **Transparency**: When using the parser, the process and results of prompt construction are completely visible and transparent to developers in the `reply` function, and developers can precisely debug their applications. - -## Parser Module - -### Overview - -The main functions of the parser module include: - -1. Provide "format instruction", that is, remind LLM where to generate what output, for example - -```` -You should generate python code in a fenced code block as follows -```python -{your_python_code} -``` -```` - -2. Provide a parse function, which directly parses the text generated by LLM into the target data format, - -3. Post-processing for dictionary format. After parsing the text into a dictionary, different fields may have different uses. - -AgentScope provides multiple built-in parsers, and developers can choose according to their needs. - -| Target Format | Parser Class | Description | -|---------------------------|----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| String | `MarkdownCodeBlockParser` | Requires LLM to generate specified text within a Markdown code block marked by ```. The result is a string. | -| Dictionary | `MarkdownJsonDictParser` | Requires LLM to produce a specified dictionary within the code block marked by \```json and \```. The result is a Python dictionary. | -| | `MultiTaggedContentParser` | Requires LLM to generate specified content within multiple tags. Contents from different tags will be parsed into a single Python dictionary with different key-value pairs. | -| | `RegexTaggedContentParser` | For uncertain tag names and quantities, allows users to modify regular expressions, and the return result is a dictionary. | -| JSON / Python Object Type | `MarkdownJsonObjectParser` | Requires LLM to produce specified content within the code block marked by \```json and \```. The result will be converted into a Python object via json.loads. | - - -> **NOTE**: Compared to `MarkdownJsonDictParser`, `MultiTaggedContentParser` is more suitable for weak LLMs and when the required format is too complex. -> For example, when LLM is required to generate Python code, if the code is returned directly within a dictionary, LLM needs to be aware of escaping characters (\t, \n, ...), and the differences between double and single quotes when calling `json.loads` -> -> In contrast, `MultiTaggedContentParser` guides LLM to generate each key-value pair separately in individual tags and then combines them into a dictionary, thus reducing the difficulty. - - ->**NOTE**: The built-in strategies to construct format instruction just provide some examples. In AgentScope, developer has complete control over prompt construction. So they can choose not to use the format instruction provided by parsers, customizing their format instruction by hand or implementing new parser class are all feasible. - -In the following sections, we will introduce the usage of these parsers based on different target formats. - -### String Type - -#### MarkdownCodeBlockParser - -##### Initialization - -- `MarkdownCodeBlockParser` requires LLM to generate specific text within a specified code block in Markdown format. Different languages can be specified with the `language_name` parameter to utilize the large model's ability to produce corresponding outputs. For example, when asking the large model to produce Python code, initialize as follows: - - ```python - from agentscope.parsers import MarkdownCodeBlockParser - - parser = MarkdownCodeBlockParser(language_name="python", content_hint="your python code") - ``` - -##### Format Instruction Template - -- `MarkdownCodeBlockParser` provides the following format instruction template. When the user calls the `format_instruction` attribute, `{language_name}` will be replaced with the string entered at initialization: - - ```` - You should generate {language_name} code in a {language_name} fenced code block as follows: - ```{language_name} - {content_hint} - ``` - ```` - -- For the above initialization with `language_name` as `"python"`, when the `format_instruction` attribute is called, the following string will be returned: - - ```python - print(parser.format_instruction) - ``` - - ```` - You should generate python code in a python fenced code block as follows - ```python - your python code - ``` - ```` - -##### Parse Function - -- `MarkdownCodeBlockParser` provides a `parse` method to parse the text generated by LLM。Its input and output are both `ModelResponse` objects, and the parsing result will be mounted on the `parsed` attribute of the output object. - - ````python - res = parser.parse( - ModelResponse( - text="""The following is generated python code - ```python - print("Hello world!") - ``` - """ - ) - ) - - print(res.parsed) - ```` - - ``` - print("hello world!") - ``` - -### Dictionary Type - -Different from string and general JSON/Python object, as a powerful format in LLM applications, AgentScope provides additional post-processing functions for dictionary type. -When initializing the parser, you can set the `keys_to_content`, `keys_to_memory`, and `keys_to_metadata` parameters to achieve filtering of key-value pairs when calling the parser's `to_content`, `to_memory`, and `to_metadata` methods. - -- `keys_to_content` specifies the key-value pairs that will be placed in the `content` field of the returned `Msg` object. The content field will be returned to other agents, participate in their prompt construction, and will also be called by the `self.speak` function for display. -- `keys_to_memory` specifies the key-value pairs that will be stored in the memory of the agent. -- `keys_to_metadata` specifies the key-value pairs that will be placed in the `metadata` field of the returned `Msg` object, which can be used for application control flow judgment, or mount some information that does not need to be returned to other agents. - -The three parameters receive bool values, string and a list of strings. The meaning of their values is as follows: -- `False`: The corresponding filter function will return `None`. -- `True`: The whole dictionary will be returned. -- `str`: The corresponding value will be directly returned. -- `List[str]`: A filtered dictionary will be returned according to the list of keys. - -By default, `keys_to_content` and `keys_to_memory` are `True`, that is, the whole dictionary will be returned. `keys_to_metadata` defaults to `False`, that is, the corresponding filter function will return `None`. - -For example, the dictionary generated by the werewolf in the daytime discussion in a werewolf game. In this example, -- `"thought"` should not be returned to other agents, but should be stored in the agent's memory to ensure the continuity of the werewolf strategy; -- `"speak"` should be returned to other agents and stored in the agent's memory; -- `"finish_discussion"` is used in the application's control flow to determine whether the discussion has ended. To save tokens, this field should not be returned to other agents or stored in the agent's memory. - - ```python - { - "thought": "The others didn't realize I was a werewolf. I should end the discussion soon.", - "speak": "I agree with you.", - "finish_discussion": True - } - ``` - -In AgentScope, we achieve post-processing by calling the `to_content`, `to_memory`, and `to_metadata` methods, as shown in the following code: - -- The code for the application's control flow, create the corresponding parser object and load it - - ```python - from agentscope.parsers import MarkdownJsonDictParser - - # ... - - agent = DictDialogAgent(...) - - # Take MarkdownJsonDictParser as example - parser = MarkdownJsonDictParser( - content_hint={ - "thought": "what you thought", - "speak": "what you speak", - "finish_discussion": "whether the discussion is finished" - }, - keys_to_content="speak", - keys_to_memory=["thought", "speak"], - keys_to_metadata=["finish_discussion"] - ) - - # Load parser, which is equivalent to specifying the required format - agent.set_parser(parser) - - # The discussion process - while True: - # ... - x = agent(x) - # Break the loop according to the finish_discussion field in metadata - if x.metadata["finish_discussion"]: - break - ``` - -- Filter the dictionary in the agent's `reply` function - - ```python - # ... - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - - # ... - res = self.model(prompt, parse_func=self.parser.parse) - - # Story the thought and speak fields into memory - self.memory.add( - Msg( - self.name, - content=self.parser.to_memory(res.parsed), - role="assistant", - ) - ) - - # Store in content and metadata fields in the returned Msg object - msg = Msg( - self.name, - content=self.parser.to_content(res.parsed), - role="assistant", - metadata=self.parser.to_metadata(res.parsed), - ) - self.speak(msg) - - return msg - ``` - -> **Note**: `keys_to_content`, `keys_to_memory`, and `keys_to_metadata` parameters can be a string, a list of strings, or a bool value. -> - For `True`, the `to_content`, `to_memory`, and `to_metadata` methods will directly return the whole dictionary. -> - For `False`, the `to_content`, `to_memory`, and `to_metadata` methods will directly return `None`. -> - For a string, the `to_content`, `to_memory`, and `to_metadata` methods will directly extract the corresponding value. For example, if `keys_to_content="speak"`, the `to_content` method will put `res.parsed["speak"]` into the `content` field of the `Msg` object, and the `content` field will be a string rather than a dictionary. -> - For a list of string, the `to_content`, `to_memory`, and `to_metadata` methods will filter the dictionary according to the list of keys. -> ```python -> parser = MarkdownJsonDictParser( -> content_hint={ -> "thought": "what you thought", -> "speak": "what you speak", -> }, -> keys_to_content="speak", -> keys_to_memory=["thought", "speak"], -> ) -> -> example_dict = {"thought": "abc", "speak": "def"} -> print(parser.to_content(example_dict)) # def -> print(parser.to_memory(example_dict)) # {"thought": "abc", "speak": "def"} -> print(parser.to_metadata(example_dict)) # None -> ``` -> ``` -> def -> {"thought": "abc", "speak": "def"} -> None -> ``` - -#### Parsers - -For dictionary type return values, AgentScope provides multiple parsers for developers to choose from according to their needs. - -##### RegexTaggedContentParser - -###### Initialization - -`RegexTaggedContentParser` is designed for scenarios where 1) the tag name is uncertain, and 2) the number of tags is uncertain. -In this case, the parser cannot provide a general response format instruction, so developers need to provide the corresponding response format instruction (`format_instruction`) when initializing. -Of course, the developers can handle the prompt engineering by themselves optionally. - -```python -from agentscope.parsers import RegexTaggedContentParser - -parser = RegexTaggedContentParser( - format_instruction="""Respond with specific tags as outlined below -what you thought -what you speak -""", - try_parse_json=True, # Try to parse the content of the tag as JSON object - required_keys=["thought", "speak"] # Required keys in the returned dictionary -) -``` - -##### MarkdownJsonDictParser - -###### Initialization & Format Instruction Template - -- `MarkdownJsonDictParser` requires LLM to generate dictionary within a code block fenced by \```json and \``` tags. - -- Except `keys_to_content`, `keys_to_memory` and `keys_to_metadata`, the `content_hint` parameter can be provided to give an example and explanation of the response result, that is, to remind LLM where and what kind of dictionary should be generated. -This parameter can be a string or a dictionary. For dictionary, it will be automatically converted to a string when constructing the format instruction. - - ```python - from agentscope.parsers import MarkdownJsonDictParser - - # dictionary as content_hint - MarkdownJsonDictParser( - content_hint={ - "thought": "what you thought", - "speak": "what you speak", - } - ) - # or string as content_hint - MarkdownJsonDictParser( - content_hint="""{ - "thought": "what you thought", - "speak": "what you speak", - }""" - ) - ``` - - - The corresponding `instruction_format` attribute - - ```` - You should respond a json object in a json fenced code block as follows: - ```json - {content_hint} - ``` - ```` - -###### Validation - -The `content_hint` parameter in `MarkdownJsonDictParser` also supports type validation based on Pydantic. When initializing, you can set `content_hint` to a Pydantic model class, and AgentScope will modify the `instruction_format` attribute based on this class. Besides, Pydantic will be used to validate the dictionary returned by LLM during parsing. - -A simple example is as follows, where `"..."` can be filled with specific type validation rules, which can be referred to the [Pydantic](https://docs.pydantic.dev/latest/) documentation. - - ```python - from pydantic import BaseModel, Field - from agentscope.parsers import MarkdownJsonDictParser - - class Schema(BaseModel): - thought: str = Field(..., description="what you thought") - speak: str = Field(..., description="what you speak") - end_discussion: bool = Field(..., description="whether the discussion is finished") - - parser = MarkdownJsonDictParser(content_hint=Schema) - ``` - -- The corresponding `instruction_format` attribute - -```` -Respond a JSON dictionary in a markdown's fenced code block as follows: -```json -{a_JSON_dictionary} -``` -The generated JSON dictionary MUST follow this schema: -{'properties': {'speak': {'description': 'what you speak', 'title': 'Speak', 'type': 'string'}, 'thought': {'description': 'what you thought', 'title': 'Thought', 'type': 'string'}, 'end_discussion': {'description': 'whether the discussion reached an agreement or not', 'title': 'End Discussion', 'type': 'boolean'}}, 'required': ['speak', 'thought', 'end_discussion'], 'title': 'Schema', 'type': 'object'} -```` - -- During the parsing process, Pydantic will be used for type validation, and an exception will be thrown if the validation fails. Meanwhile, Pydantic also provides some fault tolerance capabilities, such as converting the string `"true"` to Python's `True`: - -```` -parser.parser(""" -```json -{ - "thought": "The others didn't realize I was a werewolf. I should end the discussion soon.", - "speak": "I agree with you.", - "end_discussion": "true" -} -``` -""") -```` - -##### MultiTaggedContentParser - -`MultiTaggedContentParser` asks LLM to generate specific content within multiple tag pairs. The content from different tag pairs will be parsed into a single Python dictionary. Its usage is similar to `MarkdownJsonDictParser`, but the initialization method is different, and it is more suitable for weak LLMs or complex return content. - -###### Initialization & Format Instruction Template - -Within `MultiTaggedContentParser`, each tag pair will be specified by as `TaggedContent` object, which contains -- Tag name (`name`), the key value in the returned dictionary -- Start tag (`tag_begin`) -- Hint for content (`content_hint`) -- End tag (`tag_end`) -- Content parsing indication (`parse_json`), default as `False`. When set to `True`, the parser will automatically add hint that requires JSON object between the tags, and its extracted content will be parsed into a Python object via `json.loads` - -```python -from agentscope.parsers import MultiTaggedContentParser, TaggedContent -parser = MultiTaggedContentParser( - TaggedContent( - name="thought", - tag_begin="[THOUGHT]", - content_hint="what you thought", - tag_end="[/THOUGHT]" - ), - TaggedContent( - name="speak", - tag_begin="[SPEAK]", - content_hint="what you speak", - tag_end="[/SPEAK]" - ), - TaggedContent( - name="finish_discussion", - tag_begin="[FINISH_DISCUSSION]", - content_hint="true/false, whether the discussion is finished", - tag_end="[/FINISH_DISCUSSION]", - parse_json=True, # we expect the content of this field to be parsed directly into a Python boolean value - ) -) - -print(parser.format_instruction) -``` - -``` -Respond with specific tags as outlined below, and the content between [FINISH_DISCUSSION] and [/FINISH_DISCUSSION] MUST be a JSON object: -[THOUGHT]what you thought[/THOUGHT] -[SPEAK]what you speak[/SPEAK] -[FINISH_DISCUSSION]true/false, whether the discussion is finished[/FINISH_DISCUSSION] -``` - -###### Parse Function - -- `MultiTaggedContentParser`'s parsing result is a dictionary, whose keys are the value of `name` in the `TaggedContent` objects. -The following is an example of parsing the LLM response in the werewolf game: - -```python -res_dict = parser.parse( - ModelResponse( - text="""As a werewolf, I should keep pretending to be a villager -[THOUGHT]The others didn't realize I was a werewolf. I should end the discussion soon.[/THOUGHT] -[SPEAK]I agree with you.[/SPEAK] -[FINISH_DISCUSSION]true[/FINISH_DISCUSSION]""" - ) -) - -print(res_dict) -``` - -``` -{ - "thought": "The others didn't realize I was a werewolf. I should end the discussion soon.", - "speak": "I agree with you.", - "finish_discussion": true -} -``` - -### JSON / Python Object Type - -#### MarkdownJsonObjectParser - -`MarkdownJsonObjectParser` also uses the \```json and \``` tags in Markdown, but does not limit the content type. It can be a list, dictionary, number, string, etc., which can be parsed into a Python object via `json.loads`. - -##### Initialization & Format Instruction Template - -```python -from agentscope.parsers import MarkdownJsonObjectParser - -parser = MarkdownJsonObjectParser( - content_hint="{A list of numbers.}" -) - -print(parser.format_instruction) -``` - -```` -You should respond a json object in a json fenced code block as follows: -```json -{a list of numbers} -``` -```` - -##### Parse Function - -````python -res = parser.parse( - ModelResponse( - text="""Yes, here is the generated list -```json -[1,2,3,4,5] -``` -""") -) - -print(type(res)) -print(res) -```` - -``` - -[1, 2, 3, 4, 5] -``` - -## Typical Use Cases - -### WereWolf Game - -Werewolf game is a classic use case of dictionary parser. In different stages of the game, the same agent needs to generate different identification fields in addition to `"thought"` and `"speak"`, such as whether the discussion is over, whether the seer uses its ability, whether the witch uses the antidote and poison, and voting. - -AgentScope has built-in examples of [werewolf game](https://github.com/modelscope/agentscope/tree/main/examples/game_werewolf), which uses `DictDialogAgent` class and different parsers to achieve flexible target format switching. By using the post-processing function of the parser, it separates "thought" and "speak", and controls the progress of the game successfully. -More details can be found in the werewolf game [source code](https://github.com/modelscope/agentscope/tree/main/examples/game_werewolf). - -### ReAct Agent and Tool Usage - -`ReActAgent` is an agent class built for tool usage in AgentScope, based on the ReAct algorithm, and can be used with different tool functions. The tool call, format parsing, and implementation of `ReActAgent` are similar to the parser. For detailed implementation, please refer to the [source code](https://github.com/modelscope/agentscope/blob/main/src/agentscope/agents/react_agent.py). - - -## Customized Parser - -AgentScope provides a base class `ParserBase` for parsers. Developers can inherit this base class, and implement the `format_instruction` attribute and `parse` method to create their own parser. - -For dictionary type parsing, you can also inherit the `agentscope.parser.DictFilterMixin` class to implement post-processing for dictionary type. - -```python -from abc import ABC, abstractmethod - -from agentscope.models import ModelResponse - - -class ParserBase(ABC): - """The base class for model response parser.""" - - format_instruction: str - """The instruction for the response format.""" - - @abstractmethod - def parse(self, response: ModelResponse) -> ModelResponse: - """Parse the response text to a specific object, and stored in the - parsed field of the response object.""" - - # ... -``` diff --git a/docs/sphinx_doc/en/source/tutorial/203-stream.md b/docs/sphinx_doc/en/source/tutorial/203-stream.md deleted file mode 100644 index ef154c745..000000000 --- a/docs/sphinx_doc/en/source/tutorial/203-stream.md +++ /dev/null @@ -1,123 +0,0 @@ -(203-stream-en)= - -# Streaming - -AgentScope supports streaming mode for the following LLM APIs in both **terminal** and **AgentScope Studio**. - -| API | Model Wrapper | `model_type` field in model configuration | -|--------------------|---------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------| -| OpenAI Chat API | [`OpenAIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/openai_model.py) | `"openai_chat"` | -| DashScope Chat API | [`DashScopeChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_chat"` | -| Gemini Chat API | [`GeminiChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/gemini_model.py) | `"gemini_chat"` | -| ZhipuAI Chat API | [`ZhipuAIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/zhipu_model.py) | `"zhipuai_chat"` | -| ollama Chat API | [`OllamaChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/ollama_model.py) | `"ollama_chat"` | -| LiteLLM Chat API | [`LiteLLMChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/litellm_model.py) | `"litellm_chat"` | - - -## Setup Streaming Mode - -AgentScope allows users to set up streaming mode in both model configuration and model calling. - -### In Model Configuration - -To use streaming mode, set the stream field to `True` in the model configuration. - -```python -model_config = { - "config_name": "xxx", - "model_type": "xxx", - "stream": True, - # ... -} -``` - -### In Model Calling - -Within an agent, you can call the model with the `stream` parameter set to `True`. -Note the `stream` parameter in the model calling will override the `stream` field in the model configuration. - -```python -class MyAgent(AgentBase): - # ... - def reply(self, x: Optional[Msg, Sequence[Msg]] = None) -> Msg: - # ... - response = self.model( - prompt, - stream=True, - ) - # ... -``` - -## Printing in Streaming Mode - -In streaming mode, the `stream` field of a model response will be a generator, and the `text` field will be `None`. -For compatibility with the non-streaming mode, once the `text` field is accessed, the generator in `stream` field will be iterated to generate the full text and store it in the `text` field. -So that even in streaming mode, users can handle the response text in `text` field as usual. - -However, if you want to print in streaming mode, just put the generator in `self.speak` to print the streaming text in the terminal and AgentScope Studio. - -After printing the streaming response, the full text of the response will be available in the `response.text` field. - -```python - def reply(self, x: Optional[Msg, Sequence[Msg]] = None) -> Msg: - # ... - # Use stream=True if you want to set up streaming mode in model calling - response = self.model(prompt) - - # For now, the response.text is None - - # Print the response in streaming mode in terminal and AgentScope Studio (if available) - self.speak(response.stream) - - # After printing, the response.text will be the full text of the response, and you can handle it as usual - msg = Msg(self.name, content=response.text, role="assistant") - - self.memory.add(msg) - - return msg - -``` - -## Advanced Usage - -For users who want to handle the streaming response by themselves, they can iterate the generator and handle the response text in their own way. - -An example of how to handle the streaming response is in the `speak` function of `AgentBase` as follows. -The `log_stream_msg` function will print the streaming response in the terminal and AgentScope Studio (if registered). - -```python - # ... - elif isinstance(content, GeneratorType): - # The streaming message must share the same id for displaying in - # the agentscope studio. - msg = Msg(name=self.name, content="", role="assistant") - for last, text_chunk in content: - msg.content = text_chunk - log_stream_msg(msg, last=last) - else: - # ... -``` - -However, they should remember the following points: - -1. When iterating the generator, the `response.text` field will include the text that has been iterated automatically. -2. The generator in the `stream` field will generate a tuple of boolean and text. The boolean indicates whether the text is the end of the response, and the text is the response text until now. -3. To print streaming text in AgentScope Studio, the message id should be the same for one response in the `log_stream_msg` function. - - -```python - def reply(self, x: Optional[Msg, Sequence[Msg]] = None) -> Msg: - # ... - response = self.model(prompt) - - # For now, the response.text is None - - # Iterate the generator and handle the response text by yourself - for last_chunk, text in response.stream: - # Handle the text in your way - # ... - - -``` - -[[Return to the top]](#203-stream-en) diff --git a/docs/sphinx_doc/en/source/tutorial/204-service.md b/docs/sphinx_doc/en/source/tutorial/204-service.md deleted file mode 100644 index 572b7e5af..000000000 --- a/docs/sphinx_doc/en/source/tutorial/204-service.md +++ /dev/null @@ -1,334 +0,0 @@ -(204-service-en)= - -# Tool - -Service function is a set of multi-functional utility tools that can be -used to enhance the capabilities of agents, such as executing Python code, -web search, file operations, and more. -This tutorial provides an overview of the service functions available in -AgentScope and how to use them to enhance the capabilities of your agents. - -## Built-in Service Functions - -The following table outlines the various Service functions by type. These functions can be called using `agentscope.service.{function_name}`. - -| Service Scene | Service Function Name | Description | -|-----------------------------|---------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------| -| Code | `execute_python_code` | Execute a piece of Python code, optionally inside a Docker container. | -| | `NoteBookExecutor` | Compute Execute a segment of Python code in the IPython environment of the NoteBookExecutor, adhering to the IPython interactive computing style. | -| Retrieval | `retrieve_from_list` | Retrieve a specific item from a list based on given criteria. | -| | `cos_sim` | Compute the cosine similarity between two different embeddings. | -| SQL Query | `query_mysql` | Execute SQL queries on a MySQL database and return results. | -| | `query_sqlite` | Execute SQL queries on a SQLite database and return results. | -| | `query_mongodb` | Perform queries or operations on a MongoDB collection. | -| Text Processing | `summarization` | Summarize a piece of text using a large language model to highlight its main points. | -| Web | `bing_search` | Perform bing search | -| | `google_search` | Perform google search | -| | `arxiv_search` | Perform arXiv search | -| | `download_from_url` | Download file from given URL. | -| | `load_web` | Load and parse the web page of the specified url (currently only supports HTML). | -| | `digest_webpage` | Digest the content of a already loaded web page (currently only supports HTML). | -| | `dblp_search_publications` | Search publications in the DBLP database | -| | `dblp_search_authors` | Search for author information in the DBLP database | -| | `dblp_search_venues` | Search for venue information in the DBLP database | -| | `tripadvisor_search` | Search for locations using the TripAdvisor API. | -| | `tripadvisor_search_location_photos` | Retrieve photos for a specific location using the TripAdvisor API. | -| | `tripadvisor_search_location_details` | Get detailed information about a specific location using the TripAdvisor API. | -| File | `create_file` | Create a new file at a specified path, optionally with initial content. | -| | `delete_file` | Delete a file specified by a file path. | -| | `move_file` | Move or rename a file from one path to another. | -| | `create_directory` | Create a new directory at a specified path. | -| | `delete_directory` | Delete a directory and all its contents. | -| | `move_directory` | Move or rename a directory from one path to another. | -| | `read_text_file` | Read and return the content of a text file. | -| | `write_text_file` | Write text content to a file at a specified path. | -| | `read_json_file` | Read and parse the content of a JSON file. | -| | `write_json_file` | Serialize a Python object to JSON and write to a file. | -| Multi Modality | `dashscope_text_to_image` | Convert text to image using Dashscope API. | -| | `dashscope_image_to_text` | Convert image to text using Dashscope API. | -| | `dashscope_text_to_audio` | Convert text to audio using Dashscope API. | -| | `openai_text_to_image` | Convert text to image using OpenAI API | -| | `openai_edit_image` | Edit an image based on the provided mask and prompt using OpenAI API | -| | `openai_create_image_variation` | Create variations of an image using OpenAI API | -| | `openai_image_to_text` | Convert text to image using OpenAI API | -| | `openai_text_to_audio` | Convert text to audio using OpenAI API | -| | `openai_audio_to_text` | Convert audio to text using OpenAI API | -| *More services coming soon* | | More service functions are in development and will be added to AgentScope to further enhance its capabilities. | - -About each service function, you can find detailed information in the -[API document](https://modelscope.github.io/agentscope/). - -## How to use Service Functions - -AgentScope provides two classes for service functions, -`ServiceToolkit` and `ServiceResponse`. - -### About Service Toolkit - -The use of tools for LLM usually involves five steps: - -1. **Prepare tool functions**. That is, developers should pre-process the -functions by providing necessary parameters, e.g. api key, username, -password, etc. -2. **Prepare instruction for LLM**. A detailed description for these tool -functions are required for the LLM to understand them properly. -3. **Guide LLM how to use tool functions**. A format description for calling -functions is required. -4. **Parse LLM response**. Once the LLM generates a response, -we need to parse it according to above format in the third step. -5. **Call functions and handle exceptions**. Calling the functions, return -the results, and handle exceptions. - -To simplify the above steps and improve reusability, AgentScope introduces -`ServiceToolkit`. It can -- register python functions -- generate tool function descriptions in both string and JSON schema format -- generate usage instruction for LLM -- parse the model response, call the tool functions, and handle exceptions - -#### How to use - -Follow the steps below to use `ServiceToolkit`: - -1. Init a `ServiceToolkit` object and register service functions with necessary -parameters. Take the following Bing search function as an example. - -```python -def bing_search( - question: str, - api_key: str, - num_results: int = 10, - **kwargs: Any, -) -> ServiceResponse: - """ - Search question in Bing Search API and return the searching results - - Args: - question (`str`): - The search query string. - api_key (`str`): - The API key provided for authenticating with the Bing Search API. - num_results (`int`, defaults to `10`): - The number of search results to return. - **kwargs (`Any`): - Additional keyword arguments to be included in the search query. - For more details, please refer to - https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/query-parameters - - [omitted for brevity] - """ -``` - -We register the function in a `ServiceToolkit` object by providing `api_key` and `num_results` as necessary parameters. - -```python -from agentscope.service import ServiceToolkit - -service_toolkit = ServiceToolkit() - -service_toolkit.add( - bing_search, - api_key="xxx", - num_results=3 -) -``` - -2. Use the `tools_instruction` attribute to instruct LLM in prompt, or use the `json_schemas` attribute to get the JSON schema format descriptions to construct customized instruction or directly use in model APIs (e.g. OpenAI Chat API). - -````text ->> print(service_toolkit.tools_instruction) -## Tool Functions: -The following tool functions are available in the format of -``` -{index}. {function name}: {function description} -{argument1 name} ({argument type}): {argument description} -{argument2 name} ({argument type}): {argument description} -... -``` - -1. bing_search: Search question in Bing Search API and return the searching results - question (str): The search query string. -```` -````text ->> print(service_toolkit.json_schemas) -{ - "bing_search": { - "type": "function", - "function": { - "name": "bing_search", - "description": "Search question in Bing Search API and return the searching results", - "parameters": { - "type": "object", - "properties": { - "question": { - "type": "string", - "description": "The search query string." - } - }, - "required": [ - "question" - ] - } - } - } -} -```` - -3. Guide LLM how to use tool functions by the `tools_calling_format` attribute. -The ServiceToolkit module requires LLM to return a list of dictionaries in -JSON format, where each dictionary represents a function call. It must -contain two fields, `name` and `arguments`, where `name` is the function name -and `arguments` is a dictionary that maps from the argument name to the -argument value. - - -```text ->> print(service_toolkit.tools_calling_format) -[{"name": "{function name}", "arguments": {"{argument1 name}": xxx, "{argument2 name}": xxx}}] -``` - -4. Parse the LLM response and call functions by its `parse_and_call_func` -method. This function takes a string or a parsed dictionary as input. -- When the input is a string, this function will parse it accordingly and execute the function with the parsed arguments. -- While if the input is a parse dictionary, it will call the function directly. - -```python -# a string input -string_input = '[{"name": "bing_search", "arguments": {"question": "xxx"}}]' -res_of_string_input = service_toolkit.parse_and_call_func(string_input) - -# or a parsed dictionary -dict_input = [{"name": "bing_search", "arguments": {"question": "xxx"}}] -# res_of_dict_input is the same as res_of_string_input -res_of_dict_input = service_toolkit.parse_and_call_func(dict_input) - -print(res_of_string_input) -``` -``` -1. Execute function bing_search - [ARGUMENTS]: - question: xxx - [STATUS]: SUCCESS - [RESULT]: ... -``` - -More specific examples refer to the `ReActAgent` class in `agentscope.agents`. - -#### Create new Service Function - -A new service function that can be used by `ServiceToolkit` should meet the following requirements: - -1. Well-formatted docstring (Google style is recommended), so that the -`ServiceToolkit` can extract both the function descriptions. -2. The name of the service function should be self-explanatory, -so that the LLM can understand the function and use it properly. -3. The typing of the arguments should be provided when defining -the function (e.g. `def func(a: int, b: str, c: bool)`), so that -the agent can specify the arguments properly. - - -### About ServiceResponse - -`ServiceResponse` is a wrapper for the execution results of the services, -containing two fields, `status` and `content`. When the Service function -runs to completion normally, `status` is `ServiceExecStatus.SUCCESS`, and -`content` is the return value of the function. When an error occurs during -execution, `status` is `ServiceExecStatus.Error`, and `content` contains -the error message. - -```python -class ServiceResponse(dict): - """Used to wrap the execution results of the services""" - - __setattr__ = dict.__setitem__ - __getattr__ = dict.__getitem__ - - def __init__( - self, - status: ServiceExecStatus, - content: Any, - ): - """Constructor of ServiceResponse - - Args: - status (`ServiceExeStatus`): - The execution status of the service. - content (`Any`) - If the argument`status` is `SUCCESS`, `content` is the - response. We use `object` here to support various objects, - e.g. str, dict, image, video, etc. - Otherwise, `content` is the error message. - """ - self.status = status - self.content = content - - # [omitted for brevity] -``` - -## Example - -```python -import json -import inspect -from agentscope.service import ServiceResponse -from agentscope.agents import AgentBase -from agentscope.message import Msg - -from typing import Optional, Union, Sequence - - -def create_file(file_path: str, content: str = "") -> ServiceResponse: - """ - Create a file and write content to it. - - Args: - file_path (str): The path to the file to be created. - content (str): The content to be written to the file. - - Returns: - ServiceResponse: A boolean indicating success or failure, and a - string containing any error message (if any), including the error type. - """ - # ... [omitted for brevity] - - -class YourAgent(AgentBase): - # ... [omitted for brevity] - - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - # ... [omitted for brevity] - - # construct a prompt to ask the agent to provide the parameters in JSON format - prompt = ( - f"To complete the user request\n```{x['content']}```\n" - "Please provide the necessary parameters in JSON format for the " - "function:\n" - f"Function: {create_file.__name__}\n" - "Description: Create a file and write content to it.\n" - ) - - # add detailed information about the function parameters - sig = inspect.signature(create_file) - parameters = sig.parameters.items() - params_prompt = "\n".join( - f"- {name} ({param.annotation.__name__}): " - f"{'(default: ' + json.dumps(param.default) + ')'if param.default is not inspect.Parameter.empty else ''}" - for name, param in parameters - ) - prompt += params_prompt - - # get the model response - model_response = self.model(prompt).text - - # parse the model response and call the create_file function - try: - kwargs = json.loads(model_response) - create_file(**kwargs) - except: - # Error handling - pass - - # ... [omitted for brevity] -``` - -[[Return to Top]](#204-service-en) diff --git a/docs/sphinx_doc/en/source/tutorial/205-memory.md b/docs/sphinx_doc/en/source/tutorial/205-memory.md deleted file mode 100644 index 788a99c21..000000000 --- a/docs/sphinx_doc/en/source/tutorial/205-memory.md +++ /dev/null @@ -1,223 +0,0 @@ -(205-memory-en)= - -# Memory - -In AgentScope, memory is used to store historical information, allowing the -agent to provide more coherent and natural responses based on context. -This tutorial will first introduce the carrier of information in memory, -message, and then introduce the functions and usage of the memory module in -AgentScope. - -## About Message - -### `MessageBase` Class - -In AgentScope, the message base class is a subclass of Python dictionary, -consisting of two required fields (`name` and `content`) and an optional -field (`url`). -Specifically, the `name` field represents the originator of the message, -the `content` field represents the content of the message, and the `url` -field represents the data link attached to the message, which can be a -local link to multi-modal data or a web link. -As a dictionary type, developers can also add other fields -as needed. When a message is created, a unique ID is automatically -generated to identify the message. The creation time of the message is also -automatically recorded in the form of a timestamp. - -In the specific implementation, AgentScope first provides a `MessageBase` -base class to define the basic properties and usage of messages. -Unlike general dictionary types, the instantiated objects of `MessageBase` -can access attribute values through `object_name.{attribute_name}` or -`object_name['attribute_name']`. -The key attributes of the `MessageBase` class are as follows: - -- **`name`**: This attribute denotes the originator of the message. It's a critical piece of metadata, useful in scenarios where distinguishing between different speakers is necessary. -- **`content`**: The substance of the message itself. It can include text, structured data, or any other form of content that is relevant to the interaction and requires processing by the agent. -- **`url`**: An optional attribute that allows the message to be linked to external resources. These can be direct links to files, multi-modal data, or web pages. -- **`timestamp`**: A timestamp indicating when the message was created. -- **`id`**: Each message is assigned a unique identifier (ID) upon creation. - -```python -class MessageBase(dict): - """Base Message class, which is used to maintain information for dialog, - memory and used to construct prompt. - """ - - def __init__( - self, - name: str, - content: Any, - url: Optional[Union[Sequence[str], str]] = None, - timestamp: Optional[str] = None, - **kwargs: Any, - ) -> None: - """Initialize the message object - - Args: - name (`str`): - The name of who send the message. It's often used in - role-playing scenario to tell the name of the sender. - However, you can also only use `role` when calling openai api. - The usage of `name` refers to - https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models. - content (`Any`): - The content of the message. - url (`Optional[Union[list[str], str]]`, defaults to None): - A url to file, image, video, audio or website. - timestamp (`Optional[str]`, defaults to None): - The timestamp of the message, if None, it will be set to - current time. - **kwargs (`Any`): - Other attributes of the message. For OpenAI API, you should - add "role" from `["system", "user", "assistant", "function"]`. - When calling OpenAI API, `"role": "assistant"` will be added - to the messages that don't have "role" attribute. - - """ - # id and timestamp will be added to the object as its attributes - # rather than items in dict - self.id = uuid4().hex - if timestamp is None: - self.timestamp = _get_timestamp() - else: - self.timestamp = timestamp - - self.name = name - self.content = content - - if url: - self.url = url - - self.update(kwargs) - - def __getattr__(self, key: Any) -> Any: - try: - return self[key] - except KeyError as e: - raise AttributeError(f"no attribute '{key}'") from e - - def __setattr__(self, key: Any, value: Any) -> None: - self[key] = value - - def __delattr__(self, key: Any) -> None: - try: - del self[key] - except KeyError as e: - raise AttributeError(f"no attribute '{key}'") from e - - def to_str(self) -> str: - """Return the string representation of the message""" - raise NotImplementedError - - def serialize(self) -> str: - """Return the serialized message.""" - raise NotImplementedError - - # ... [省略代码以简化] -``` - -### `Msg` Class - -`Msg` class extends `MessageBase` and represents a standard *message*. -`Msg` provides concrete definitions for the `to_str` and `serialize` -methods to enable string representation and serialization suitable for the -agent's operational context. -Within an `Agent` class, its `reply` function typically returns an instance of -`Msg` to facilitate message passing within AgentScope. - -```python -class Msg(MessageBase): - """The Message class.""" - - def __init__( - self, - name: str, - content: Any, - url: Optional[Union[Sequence[str], str]] = None, - timestamp: Optional[str] = None, - echo: bool = False, - **kwargs: Any, - ) -> None: - super().__init__( - name=name, - content=content, - url=url, - timestamp=timestamp, - **kwargs, - ) - if echo: - logger.chat(self) - - def to_str(self) -> str: - """Return the string representation of the message""" - return f"{self.name}: {self.content}" - - def serialize(self) -> str: - return json.dumps({"__type": "Msg", **self}) -``` - -## About Memory - -### `MemoryBase` Class - -`MemoryBase` is an abstract class that handles an agent's memory in a structured way. It defines operations for storing, retrieving, deleting, and manipulating *message*'s content. - -```python -class MemoryBase(ABC): - # ... [code omitted for brevity] - - def get_memory( - self, - return_type: PromptType = PromptType.LIST, - recent_n: Optional[int] = None, - filter_func: Optional[Callable[[int, dict], bool]] = None, - ) -> Union[list, str]: - raise NotImplementedError - - def add(self, memories: Union[list[dict], dict]) -> None: - raise NotImplementedError - - def delete(self, index: Union[Iterable, int]) -> None: - raise NotImplementedError - - def load( - self, - memories: Union[str, dict, list], - overwrite: bool = False, - ) -> None: - raise NotImplementedError - - def export( - self, - to_mem: bool = False, - file_path: Optional[str] = None, - ) -> Optional[list]: - raise NotImplementedError - - def clear(self) -> None: - raise NotImplementedError - - def size(self) -> int: - raise NotImplementedError -``` - -Here are the key methods of `MemoryBase`: - -- **`get_memory`**: This method is responsible for retrieving stored messages from the agent's memory. It can return these messages in different formats as specified by the `return_type`. The method can also retrieve a specific number of recent messages if `recent_n` is provided, and it can apply a filtering function (`filter_func`) to select messages based on custom criteria. -- **`add`**: This method is used to add a new *message* to the agent's memory. It can accept a single message or a list of messages. Each message is typically an instance of `MessageBase` or its subclasses. -- **`delete`**: This method enables the removal of messages from memory by their index (or indices if an iterable is provided). -- **`load`**: This method allows for the bulk loading of messages into the agent's memory from an external source. The `overwrite` parameter determines whether to clear the existing memory before loading the new set of messages. -- **`export`**: This method facilitates exporting the stored *message* from the agent's memory either to an external file (specified by `file_path`) or directly into the working memory of the program (if `to_mem` is set to `True`). -- **`clear`**: This method purges all *message* from the agent's memory, essentially resetting it. -- **`size`**: This method returns the number of messages currently stored in the agent's memory. - -### `TemporaryMemory` - -The `TemporaryMemory` class is a concrete implementation of `MemoryBase`, providing a memory store that exists during the runtime of an agent, which is used as the default memory type of agents. Besides all the behaviors from `MemoryBase`, the `TemporaryMemory` additionally provides methods for retrieval: - -- **`retrieve_by_embedding`**: Retrieves `messages` that are most similar to a query, based on their embeddings. It uses a provided metric to determine the relevance and can return the top `k` most relevant messages. -- **`get_embeddings`**: Return the embeddings for all messages in memory. If a message does not have an embedding and an embedding model is provided, it will generate and store the embedding for the message. - -For more details about the usage of `Memory` and `Msg`, please refer to the API references. - -[[Return to the top]](#205-memory-en) diff --git a/docs/sphinx_doc/en/source/tutorial/206-prompt.md b/docs/sphinx_doc/en/source/tutorial/206-prompt.md deleted file mode 100644 index dc98d6070..000000000 --- a/docs/sphinx_doc/en/source/tutorial/206-prompt.md +++ /dev/null @@ -1,554 +0,0 @@ -(206-prompt-en)= - -# Prompt Engineering - -Prompt engineering is critical in LLM-empowered applications. However, -crafting prompts for large language models (LLMs) can be challenging, -especially with different requirements from various model APIs. - -To ease the process of adapting prompt to different model APIs, AgentScope -provides a structured way to organize different data types (e.g. instruction, -hints, conversation history) into the desired format. - -Note there is no **one-size-fits-all** solution for prompt crafting. -**The goal of built-in strategies is to enable beginners to smoothly invoke -the model API, rather than achieve the best performance**. -For advanced users, we highly recommend developers to customize prompts -according to their needs and model API requirements. - -## Challenges in Prompt Construction - -In multi-agent applications, LLM often plays different roles in a -conversation. When using third-party chat APIs, it has the following -challenges: - -1. Most third-party chat APIs are designed for chatbot scenario, and the - `role` field only supports `"user"` and `"assistant"`. - -2. Some model APIs require `"user"` and `"assistant"` must speak alternatively, - and `"user"` must speak in the beginning and end of the input messages list. - Such requirements make it difficult to build a multi-agent conversation - when the agent may act as many different roles and speak continuously. - -To help beginners to quickly start with AgentScope, we provide the -following built-in strategies for most chat and generation related model APIs. - -## Built-in Prompt Strategies - -In AgentScope, we provide built-in strategies for the following chat and -generation model APIs. - -- [OpenAIChatWrapper](#openaichatwrapper) -- [DashScopeChatWrapper](#dashscopechatwrapper) -- [DashScopeMultiModalWrapper](#dashscopemultimodalwrapper) -- [OllamaChatWrapper](#ollamachatwrapper) -- [OllamaGenerationWrapper](#ollamagenerationwrapper) -- [GeminiChatWrapper](#geminichatwrapper) -- [ZhipuAIChatWrapper](#zhipuaichatwrapper) - -These strategies are implemented in the `format` functions of the model -wrapper classes. -It accepts `Msg` objects, a list of `Msg` objects, or their mixture as input. -However, `format` function will first reorganize them into a list of `Msg` -objects, so for simplicity in the following sections we treat the input as a -list of `Msg` objects. - -### OpenAIChatWrapper - -`OpenAIChatWrapper` encapsulates the OpenAI chat API, it takes a list of -dictionaries as input, where the dictionary must obey the following rules -(updated in 2024/03/22): - -- Require `role` and `content` fields, and an optional `name` field. -- The `role` field must be either `"system"`, `"user"`, or `"assistant"`. - -#### Prompt Strategy - -##### Non-Vision Models - -In OpenAI Chat API, the `name` field enables the model to distinguish -different speakers in the conversation. Therefore, the strategy of `format` -function in `OpenAIChatWrapper` is simple: - -- `Msg`: Pass a dictionary with `role`, `content`, and `name` fields directly. -- `List`: Parse each element in the list according to the above rules. - -An example is shown below: - -```python -from agentscope.models import OpenAIChatWrapper -from agentscope.message import Msg - -model = OpenAIChatWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="gpt-4", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg object - [ # a list of Msg objects - Msg(name="Bob", content="Hi.", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant"), - ], -) -print(prompt) -``` - -```bash -[ - {"role": "system", "name": "system", "content": "You are a helpful assistant"}, - {"role": "assistant", "name": "Bob", "content": "Hi."}, - {"role": "assistant", "name": "Alice", "content": "Nice to meet you!"), -] -``` - -##### Vision Models - -For vision models (gpt-4-turbo, gpt-4o, ...), if the input message contains image urls, the generated `content` field will be a list of dicts, which contains text and image urls. - -Specifically, the web image urls will be pass to OpenAI Chat API directly, while the local image urls will be converted to base64 format. More details please refer to the [official guidance](https://platform.openai.com/docs/guides/vision). - -Note the invalid image urls (e.g. `/Users/xxx/test.mp3`) will be ignored. - -```python -from agentscope.models import OpenAIChatWrapper -from agentscope.message import Msg - -model = OpenAIChatWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="gpt-4o", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg object - [ # a list of Msg objects - Msg(name="user", content="Describe this image", role="user", url="https://xxx.png"), - Msg(name="user", content="And these images", role="user", url=["/Users/xxx/test.png", "/Users/xxx/test.mp3"]), - ], -) -print(prompt) -``` - -```python -[ - { - "role": "system", - "name": "system", - "content": "You are a helpful assistant" - }, - { - "role": "user", - "name": "user", - "content": [ - { - "type": "text", - "text": "Describe this image" - }, - { - "type": "image_url", - "image_url": { - "url": "https://xxx.png" - } - }, - ] - }, - { - "role": "user", - "name": "user", - "content": [ - { - "type": "text", - "text": "And these images" - }, - { - "type": "image_url", - "image_url": { - "url": "..." # for /Users/xxx/test.png - } - }, - ] - }, -] -``` - -### DashScopeChatWrapper - -`DashScopeChatWrapper` encapsulates the DashScope chat API, which takes a list of messages as input. The message must obey the following rules (updated in 2024/03/22): - -- Require `role` and `content` fields, and `role` must be either `"user"` - `"system"` or `"assistant"`. -- If `role` is `"system"`, this message must and can only be the first - message in the list. -- The `user` and `assistant` must speak alternatively. -- The `user` must speak in the beginning and end of the input messages list. - -#### Prompt Strategy - -If the role field of the first message is `"system"`, it will be converted into a single message with the `role` field as `"system"` and the `content` field as the system message. The rest of the messages will be converted into a message with the `role` field as `"user"` and the `content` field as the conversation history. - -An example is shown below: - -```python -from agentscope.models import DashScopeChatWrapper -from agentscope.message import Msg - -model = DashScopeChatWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="qwen-max", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg object - [ # a list of Msg objects - Msg(name="Bob", content="Hi!", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant"), - ], -) -print(prompt) -``` - -```python -prompt = [ - { - "role": "user", - "content": ( - "You are a helpful assistant\n" - "\n" - "## Conversation History\n" - "Bob: Hi!\n" - "Alice: Nice to meet you!" - ) - }, -] -``` - -### DashScopeMultiModalWrapper - -`DashScopeMultiModalWrapper` encapsulates the DashScope multimodal conversation API, which takes a list of messages as input. The message must obey the following rules (updated in 2024/04/04): - -- Each message is a dictionary with `role` and `content` fields. - - The `role` field must be either `"user"`, `"system"`, or `"assistant"`. - - The `content` field must be a list of dictionaries, where - - Each dictionary only contains one key-value pair, whose key must be `text`, `image` or `audio`. - - `text` field is a string, representing the text content. - - `image` field is a string, representing the image url. - - `audio` field is a string, representing the audio url. - - The `content` field can contain multiple dictionaries with the key `image` or multiple dictionaries with the key `audio` at the same time. For example: - -```python -[ - { - "role": "user", - "content": [ - {"text": "What's the difference between these two pictures?"}, - {"image": "https://xxx1.png"}, - {"image": "https://xxx2.png"} - ] - }, - { - "role": "assistant", - "content": [{"text": "The first picture is a cat, and the second picture is a dog."}] - }, - { - "role": "user", - "content": [{"text": "I see, thanks!"}] - } -] -``` - -- The message with the `role` field as `"system"` must and can only be the first message in the list. -- The last message must have the `role` field as `"user"`. -- The `user` and `assistant` messages must alternate. - -#### Prompt Strategy - -Based on the above rules, the `format` function in `DashScopeMultiModalWrapper` will parse the input messages as follows: - -- If the first message in the input message list has a `role` field with the value `"system"`, it will be converted into a system message with the `role` field as `"system"` and the `content` field as the system message. If the `url` field in the input `Msg` object is not `None`, a dictionary with the key `"image"` or `"audio"` will be added to the `content` based on its type. -- The rest of the messages will be converted into a message with the `role` field as `"user"` and the `content` field as the conversation history. For each message, if their `url` field is not `None`, it will add a dictionary with the key `"image"` or `"audio"` to the `content` based on the file type that the `url` points to. - -An example: - -```python -from agentscope.models import DashScopeMultiModalWrapper -from agentscope.message import Msg - -model = DashScopeMultiModalWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="qwen-vl-plus", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system", url="url_to_png1"), # Msg object - [ # a list of Msg objects - Msg(name="Bob", content="Hi!", role="assistant", url="url_to_png2"), - Msg(name="Alice", content="Nice to meet you!", role="assistant", url="url_to_png3"), - ], -) -print(prompt) -``` - -```bash -[ - { - "role": "system", - "content": [ - {"text": "You are a helpful assistant"}, - {"image": "url_to_png1"} - ] - }, - { - "role": "user", - "content": [ - {"text": "## Conversation History\nBob: Hi!\nAlice: Nice to meet you!"}, - {"image": "url_to_png2"}, - {"image": "url_to_png3"}, - ] - } -] -``` - - -### LiteLLMChatWrapper - -`LiteLLMChatWrapper` encapsulates the litellm chat API, which takes a list of -messages as input. The litellm supports different types of models, and each model -might need to obey different formats. To simplify the usage, we provide a format -that could be compatible with most models. If more specific formats are needed, -you can refer to the specific model you use as well as the -[litellm](https://github.com/BerriAI/litellm) documentation to customize your -own format function for your model. - - -- format all the messages in the chat history, into a single message with `"user"` as `role` - -#### Prompt Strategy - -- Messages will consist conversation history in the `user` message prefixed by the system message and "## Conversation History". - -```python -from agentscope.models import LiteLLMChatWrapper -from agentscope.message import Msg - -model = LiteLLMChatWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="gpt-3.5-turbo", -) - -prompt = model.format( - Msg("system", "You are a helpful assistant", role="system"), - [ - Msg("user", "What is the weather today?", role="user"), - Msg("assistant", "It is sunny today", role="assistant"), - ], -) - -print(prompt) -``` - -```bash -[ - { - "role": "user", - "content": ( - "You are a helpful assistant\n" - "\n" - "## Conversation History\n" - "user: What is the weather today?\n" - "assistant: It is sunny today" - ), - }, -] -``` - -### OllamaChatWrapper - -`OllamaChatWrapper` encapsulates the Ollama chat API, which takes a list of -messages as input. The message must obey the following rules (updated in -2024/03/22): - -- Require `role` and `content` fields, and `role` must be either `"user"`, - `"system"`, or `"assistant"`. -- An optional `images` field can be added to the message - -#### Prompt Strategy - -- If the role field of the first input message is `"system"`, -it will be treated as system prompt and the other messages will consist -conversation history in the system message prefixed by "## Conversation History". -- If the `url` attribute of messages is not `None`, we will gather all urls in -the `"images"` field in the returned dictionary. - -```python -from agentscope.models import OllamaChatWrapper -from agentscope.message import Msg - -model = OllamaChatWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="llama2", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg object - [ # a list of Msg objects - Msg(name="Bob", content="Hi.", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant", url="https://example.com/image.jpg"), - ], -) - -print(prompt) -``` - -```python -[ - { - "role": "system", - "content": ( - "You are a helpful assistant\n" - "\n" - "## Conversation History\n" - "Bob: Hi.\n" - "Alice: Nice to meet you!", - ), - "images": ["https://example.com/image.jpg"] - }, -] -``` - -### OllamaGenerationWrapper - -`OllamaGenerationWrapper` encapsulates the Ollama generation API, which -takes a string prompt as input without any constraints (updated to 2024/03/22). - -#### Prompt Strategy - -If the role field of the first message is `"system"`, a system prompt will be created. The rest of the messages will be combined into conversation history in string format. - -```python -from agentscope.models import OllamaGenerationWrapper -from agentscope.message import Msg - -model = OllamaGenerationWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="llama2", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg object - [ # a list of Msg objects - Msg(name="Bob", content="Hi.", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant"), - ], -) - -print(prompt) -``` - -```bash -You are a helpful assistant - -## Conversation History -Bob: Hi. -Alice: Nice to meet you! -``` - -### `GeminiChatWrapper` - -`GeminiChatWrapper` encapsulates the Gemini chat API, which takes a list of -messages or a string prompt as input. Similar to DashScope Chat API, if we -pass a list of messages, it must obey the following rules: - -- Require `role` and `parts` fields. `role` must be either `"user"` - or `"model"`, and `parts` must be a list of strings. -- The `user` and `model` must speak alternatively. -- The `user` must speak in the beginning and end of the input messages list. - -Such requirements make it difficult to build a multi-agent conversation when -an agent may act as many different roles and speak continuously. -Therefore, we decide to convert the list of messages into a user message -in our built-in `format` function. - -#### Prompt Strategy - -If the role field of the first message is `"system"`, a system prompt will be added in the beginning. The other messages will be combined into conversation history. - -**Note** sometimes the `parts` field may contain image urls, which is not -supported in `format` function. We recommend developers to customize the -prompt according to their needs. - -```python -from agentscope.models import GeminiChatWrapper -from agentscope.message import Msg - -model = GeminiChatWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="gemini-pro", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg object - [ # a list of Msg objects - Msg(name="Bob", content="Hi!", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant"), - ], -) - -print(prompt) -``` - -```python -[ - { - "role": "user", - "parts": [ - "You are a helpful assistant\n" - "## Conversation History\n" - "Bob: Hi!\n" - "Alice: Nice to meet you!" - ] - } -] -``` - -### `ZhipuAIChatWrapper` - -`ZhipuAIChatWrapper` encapsulates the ZhipuAI chat API, which takes a list of messages as input. The message must obey the following rules: - -- Require `role` and `content` fields, and `role` must be either `"user"` - `"system"` or `"assistant"`. -- There must be at least one `user` message. - -#### Prompt Strategy - -If the role field of the first message is `"system"`, it will be converted into a single message with the `role` field as `"system"` and the `content` field as the system message. The rest of the messages will be converted into a message with the `role` field as `"user"` and the `content` field as the conversation history. - -An example is shown below: - -```python -from agentscope.models import ZhipuAIChatWrapper -from agentscope.message import Msg - -model = ZhipuAIChatWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="glm-4", - api_key="your api key", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg object - [ # a list of Msg objects - Msg(name="Bob", content="Hi!", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant"), - ], -) -print(prompt) -``` - -```bash -[ - {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "## Conversation History\nBob: Hi!\nAlice: Nice to meet you!"}, -] -``` - -[[Return to the top]](#206-prompt-en) diff --git a/docs/sphinx_doc/en/source/tutorial/207-monitor.md b/docs/sphinx_doc/en/source/tutorial/207-monitor.md deleted file mode 100644 index df645a61d..000000000 --- a/docs/sphinx_doc/en/source/tutorial/207-monitor.md +++ /dev/null @@ -1,128 +0,0 @@ -(207-monitor-en)= - -# Monitor - -AgentScope supports to monitor the usage of model APIs. -Users can disable the monitor by setting `use_monitor=False` in `agentscope.init` if they don't need this feature. - -To monitor the detailed usage, we provide two functions, `agentscope.state_dict` and `agentscope.print_llm_usage`, -to get the current state of the monitor and print the usage of the model APIs respectively. - -The example code is as follows: - -```python - -import agentscope - -# ... - -# Get the current state of the monitor -state_dict = agentscope.state_dict() - -# Print the usage of the model APIs -agentscope.print_llm_usage() -``` - -An example `state_dict` is shown as follows: - -```json -{ - "project": "zSZ0pO", - "name": "7def6u", - "run_id": "run_20240731-104527_7def6u", - "pid": 24727, - "timestamp": "2024-07-31 10:45:27", - "disable_saving": false, - "file": { - "save_log": false, - "save_code": false, - "save_api_invoke": false, - "base_dir": null, - "run_dir": "/xxx/runs/run_20240731-104527_7def6u", - "cache_dir": "/Users/xxx/.cache/agentscope" - }, - "model": { - "model_configs": {} - }, - "logger": { - "level": "INFO" - }, - "studio": { - "active": false, - "studio_url": null - }, - "monitor": { - "use_monitor": true, - "path_db": "/.../runs/run_20240731-104527_7def6u/agentscope.db" - } -} -``` - -- When calling `agentscope.print_llm_usage`, AgentScope will print model usages as follows: - -```text -2024-08-05 15:21:54.889 | INFO | agentscope.manager._monitor:_print_table:117 - Text & Embedding Model: -2024-08-05 15:21:54.889 | INFO | agentscope.manager._monitor:_print_table:127 - | MODEL NAME | TIMES | PROMPT TOKENS | COMPLETION TOKENS | TOTAL TOKENS | -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:127 - | gpt-4-turbo | 1 | 15 | 20 | 35 | -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:127 - | gpt-4o | 1 | 43 | 34 | 77 | -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:127 - | qwen-max | 2 | 129 | 172 | 301 | -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:117 - Image Model: -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:127 - | MODEL NAME | RESOLUTION | TIMES | IMAGE COUNT | -2024-08-05 15:21:54.891 | INFO | agentscope.manager._monitor:_print_table:127 - | dall-e-3 | hd_1024*1024 | 1 | 2 | -2024-08-05 15:21:54.891 | INFO | agentscope.manager._monitor:_print_table:127 - | dall-e-3 | standard_1024*1024 | 2 | 7 | -2024-08-05 15:21:54.891 | INFO | agentscope.manager._monitor:_print_table:127 - | qwen-vl | 1024*1024 | 1 | 4 | -``` - -- You can also get the usage of the model APIs in JSON format as follows: - -```python -# print(json.dumps(agentscope.print_llm_usage(), indent=4)) -{ - "text_and_embedding": [ - { - "model_name": "gpt-4-turbo", - "times": 1, - "prompt_tokens": 15, - "completion_tokens": 20, - "total_tokens": 35 - }, - { - "model_name": "gpt-4o", - "times": 1, - "prompt_tokens": 43, - "completion_tokens": 34, - "total_tokens": 77 - }, - { - "model_name": "qwen-max", - "times": 2, - "prompt_tokens": 129, - "completion_tokens": 172, - "total_tokens": 301 - } - ], - "image": [ - { - "model_name": "dall-e-3", - "resolution": "hd_1024*1024", - "times": 1, - "image_count": 2 - }, - { - "model_name": "dall-e-3", - "resolution": "standard_1024*1024", - "times": 2, - "image_count": 7 - }, - { - "model_name": "qwen-vl", - "resolution": "1024*1024", - "times": 1, - "image_count": 4 - } - ] -} -``` - - -[[Return to the top]](#207-monitor-en) diff --git a/docs/sphinx_doc/en/source/tutorial/208-distribute.md b/docs/sphinx_doc/en/source/tutorial/208-distribute.md deleted file mode 100644 index 234ddb5fa..000000000 --- a/docs/sphinx_doc/en/source/tutorial/208-distribute.md +++ /dev/null @@ -1,469 +0,0 @@ -(208-distribute-en)= - -# Distribution - -To provide better performance and support the concurrent of more agents, AgentScope implements a parallel/distributed mode based on the Actor model. Compared to the traditional single-process mode, it has the following characteristics: - -- **High Performance**: Different agents and other services within the same application can run on different processes or even different machines, fully utilizing computing resources to unleash performance. -- **Automatic Parallelization**: Based on the Actor model, each agent has an independent state. When implementing applications, there's no need to consider invocation order, resource competition, etc., enabling automatic application parallelization. -- **Zero Migration Cost**: The code is fully compatible with the single-machine mode. Applications that can run in single-process mode can be migrated to the distributed mode at zero cost. - -This section will detail the usage of AgentScope's distributed mode and introduce its principles. - -(basic_usage-en)= - -## Basic Usage - -The distributed mode requires almost no modification to the running code compared to the traditional mode. Simply call the {func}`to_dist` function during the agent initialization phase. - -```python -# import some packages - -# init agentscope - -# Initialization in traditional mode -# agent = Agent(...) - -# Initialization in distributed mode -agent = Agent(...).to_dist() - -x = Msg(...) -y = agent(x) -``` - -In this section, we will demonstrate how to specifically use AgentScope's distributed mode with a webpage retrieval example. To highlight the acceleration effect brought by AgentScope's distributed mode, a simple custom `WebAgent` is used here. -This agent simulates the process of crawling webpages and looking for answers by sleeping for 5 seconds. In the example, there are a total of 5 agents, each crawling a webpage and searching for answers. - -The only difference between the traditional mode and the distributed mode lies in the initialization phase, specifically in `init_without_dist` and `init_with_dist`. -The only difference in `init_with_dist` compared to `init_without_dist` is the additional call to the `to_dist` function. -After initialization, the `run` function is exactly the same for both modes. However, the running time differs significantly between the two modes. - -```python -# Please do not run this code in a Jupyter notebook -# Copy the code to a `dist_main.py` file and run it using `python dist_main.py` -# Ensure you have installed the distributed version of agentscope before running the code -# pip install agentscope[distribute] - -import time -import agentscope -from agentscope.agents import AgentBase -from agentscope.message import Msg - -class WebAgent(AgentBase): - - def __init__(self, name): - super().__init__(name) - - def get_answer(self, url: str, query: str): - """Simulate crawling the web and looking for answers""" - time.sleep(5) - return f"Answer from {self.name}" - - def reply(self, x: dict = None) -> dict: - return Msg( - name=self.name, - role="assistant", - content=self.get_answer(x.content["url"], x.content["query"]) - ) - - -QUERY = "example query" -URLS = ["page_1", "page_2", "page_3", "page_4", "page_5"] - -def init_without_dist(): - return [WebAgent(f"W{i}") for i in range(len(URLS))] - - -def init_with_dist(): - return [WebAgent(f"W{i}").to_dist() for i in range(len(URLS))] - - -def run(agents): - start = time.time() - results = [] - for i, url in enumerate(URLS): - results.append(agents[i].reply( - Msg( - name="system", - role="system", - content={ - "url": url, - "query": QUERY - } - ) - )) - for result in results: - print(result.content) - end = time.time() - return end - start - - -if __name__ == "__main__": - agentscope.init() - start = time.time() - simple_agents = init_without_dist() - dist_agents = init_with_dist() - end = time.time() - print(f"Time taken for initialization: {end - start}") - print(f"Time taken without distributed mode: {run(simple_agents)}") - print(f"Time taken with distributed mode: {run(dist_agents)}") -``` - -Sample output of the above code is as follows: - -```text -Time taken for initialization: 12.944042921066284 -[W0] Answer from page_1 -[W1] Answer from page_2 -[W2] Answer from page_3 -[W3] Answer from page_4 -[W4] Answer from page_5 -Time taken without distributed mode: 25.022241830825806 -[W0] Answer from page_1 -[W1] Answer from page_2 -[W2] Answer from page_3 -[W3] Answer from page_4 -[W4] Answer from page_5 -Time taken with distributed mode: 5.021369934082031 -``` - -As observed from the output, there is a significant reduction in running time when using the distributed mode (from 25 seconds to 5 seconds). -The example above represents the most common usage of AgentScope's distributed mode. When not aiming for ultimate performance or the number of Agents is relatively small (e.g., no more than 10), it is advisable to use the method demonstrated above. -For further performance optimization, a deeper understanding of AgentScope's distributed model is required, and subsequent sections will introduce advanced usage of the distributed mode in detail. - -## Advanced Usage - -This section will introduce advanced uses of the AgentScope distributed mode to further enhance efficiency. Before delving into advanced usage, we need to have a basic understanding of the fundamental concepts of the AgentScope distributed mode. - -### Fundamental Concepts - -- **Main Process**: The process where the AgentScope application resides is called the main process. For instance, the `run` function in the example from the previous section runs in the main process. Each AgentScope application will have only one main process. -- **Agent Server Process**: In distributed mode, the agent server process is where agents run. For example, in the example from the previous section, all agents in `dist_agents` actually run in the agent server process. Multiple agent server processes can exist at the same time. Agent server processes can run on any network-accessible node, and within each agent server process, multiple agents can run simultaneously. - -- **Child Mode**: In child mode, the agent server process is spawned as a child process by the main process. In the example from the previous section, each agent in `dist_agents` is actually a child process of the main process. This mode is the default running mode for AgentScope distributed applications, meaning that when calling the `to_dist` function without any parameters, it defaults to this mode. This mode is employed in the [basic usage](#basic_usage-en) section. -- **Independent Mode**: In independent mode, the agent processes are independent of the main process. The agent processes need to be started on the machine in advance, and certain parameters need to be passed to the `to_dist` function. This mode must be used if agents need to be deployed across different machines. Additionally, this mode is recommended if performance is major concern, or you have a large number of agents. - -### Using Independent Mode - -Compared to child mode, independent mode can avoid the overhead of initializing child processes during runtime, thereby eliminating startup latency and enhancing operational efficiency in scenarios with many agents. - -In independent mode, agent server processes need to be started in advance on the machines, and the `host` and `port` of the agent server process to connect to should be passed to the `to_dist` function. - -We will still use the example from the basic usage section for demonstration. Assuming the code file from the [basic usage](#basic_usage-en) section is named `dist_main.py`, the following code should be saved as `dist_server.py`. - -```python -# Do not run this code in a Jupyter notebook -# Copy the code to a file named `dist_server.py` and run it using the command `python dist_server.py`. The directory structure should be: -# your_project_dir -# ├── dist_main.py -# └── dist_server.py -# Install the distributed version of agentscope before running the code -# pip install agentscope[distribute] - -import agentscope -from agentscope.server import RpcAgentServerLauncher -from dist_main import WebAgent - -if __name__ == "__main__": - agentscope.init( - # model_configs=... # Model configuration. If no model is needed, this parameter can be omitted. - ) - assistant_server_launcher = RpcAgentServerLauncher( - host="localhost", - port=12345, - custom_agent_classes=[WebAgent], - ) - assistant_server_launcher.launch(in_subprocess=False) - assistant_server_launcher.wait_until_terminate() -``` - -In the above code, we use `RpcAgentServerLauncher` to start an agent server process. Note that `WebAgent` is not an agent implementation provided by AgentScope, so it needs to be added to `custom_agent_classes`. Additionally, if model APIs are required in the agent server process, corresponding model parameters should be configured in `agentscope.init`. - -Furthermore, the `init_with_dist` function in `dist_main.py` needs to be updated to the following code: - -```python -def init_with_dist(): - return [WebAgent(f"W{i}").to_dist(host="localhost", port=12345) for i in range(len(URLS))] -``` - -In this new version of `init_with_dist`, two new parameters, `host` and `port`, are added to connect to the agent server process. - -After modifying the code, run the `dist_server.py` file in one command line and wait for it to start successfully. Then run the `dist_main.py` file in another command line. During execution, the following output will be displayed: - -```text -Initialization time: 0.005397319793701172 -[W0] Answer from page_1 -[W1] Answer from page_2 -[W2] Answer from page_3 -[W3] Answer from page_4 -[W4] Answer from page_5 -Non-distributed mode runtime: 25.023009061813354 -[W0] Answer from page_1 -[W1] Answer from page_2 -[W2] Answer from page_3 -[W3] Answer from page_4 -[W4] Answer from page_5 -Distributed mode runtime: 5.021481990814209 -``` - -At this point, the initialization time of `dist_main.py` will be significantly reduced, for instance, just 0.005 seconds in this case. - -### Avoiding Repeated Initialization - -The above code calls the `to_dist` function on an already initialized agent. `to_dist` essentially clones the original agent to the agent server process, retaining an {class}`RpcObject` in the main process as a proxy for the original agent. Calls to this `RpcObject` are forwarded to the corresponding agent in the agent server process. - -This process has a potential issue: the original agent is initialized twice, once in the main process and once in the agent server process. These two initializations occur sequentially, lacking the ability to be parallelized. For agents with low initialization costs, directly calling the `to_dist` function will not significantly impact performance. However, for agents with high initialization costs, repeated initialization should be avoided. Therefore, AgentScope distributed mode provides another method for initializing in distributed mode, which entails passing the `to_dist` parameter directly within the initialization function of any agent. The following code modifies the `init_with_dist` function in `dist_main.py`. - -- For child mode, simply pass `to_dist=True` in the initialization function. - - ```python - def init_with_dist(): - return [WebAgent(f"W{i}", to_dist=True) for i in range(len(URLS))] - ``` - -- For independent mode, pass the parameters previously given to the `to_dist` function as a dictionary to the `to_dist` field. - - ```python - def init_with_dist(): - return [WebAgent(f"W{i}", to_dist={"host": "localhost", "port": "12345"}) for i in range(len(URLS))] - ``` - -```{note} -Some IDEs might display a hint indicating that the `to_dist` parameter does not exist, but this will not cause an error at runtime. -Additionally, if the `to_dist` parameter has already been passed in the initialization parameters, the `to_dist` method should not be called again. -``` - -## Developer Guide - -```{note} -This section is aimed at developers who are developing new features based on the AgentScope distributed mode. It requires a certain understanding of distributed programming principles such as processes, threads, synchronization, asynchronicity, gRPC, Python metaclasses, and the Global Interpreter Lock (GIL). Even if you lack the aforementioned background, reading this section will still provide insights into the fundamental principles and advanced usages of the AgentScope distributed mode. -``` - -The core logic of the AgentScope distributed model is: - -**By using the `to_dist` function or initialization parameters, objects that originally run in any Python process are transferred to an RPC server. In the original process, a `RpcObject` proxy is retained, and any function call or attribute access on this `RpcObject` will be forwarded to the object on the RPC server. When calling functions, you can decide whether to use synchronous or asynchronous invocation.** - -The following graph illustrate the workflow of `to_dist`, synchronous and asynchronous invocation. - -```{mermaid} -sequenceDiagram - User -->> Process: initialize - Process -->> RPC Server: to_dist - User -->> Process: sync function call - Process -->> RPC Server: sync function call - RPC Server -->> RPC Server: calculate result - RPC Server -->> Process: sync result - Process -->> User: sync result - User -->> Process: async function call - Process -->> RPC Server: async function call - RPC Server -->> RPC Server: calculate result - User -->> Process: get async result - Process -->> RPC Server: get async result - RPC Server -->> Process: async result - Process -->> User: async result -``` - -As illustrated in the previous figure, the distributed mode of AgentScope essentially follows a Client-Server architecture. In this setup, the user-authored agent applications (Processes) act as the Client, while the agent server process (RPC Server) functions as the Server. In distributed mode, the Client side sends the local agents to the Server side for execution. The Client forwards local function calls and property accesses to the Server, which is responsible for receiving the agents and handling various invocation requests from the Client. - -```{note} -Communication between the Client and Server in AgentScope's distributed mode is implemented using gRPC. There is a strict limitation on the size of messages send/recv; by default, a single message cannot exceed 32 MB. This value can be further increased by modifying the `_DEFAULT_RPC_OPTIONS` parameter in `src/agentscope/constants.py`. -``` - -Next, we'll introduce the implementation of the Client and Server respectively. - -### Client Side - -The Client Side mainly consists of two primary classes: `RpcMeta` and `RpcObject`. `RpcMeta` is responsible for sending local objects to the Server, while `RpcObject` handles the forwarding of subsequent invocation requests. - -#### `RpcMeta` - -The class {class}`RpcMeta` is a metaclass that automatically adds the `to_dist` method and `to_dist` initialization parameter to its subclasses (thus IDEs might indicate `to_dist` parameter does not exist, but in actuality, it won't cause an error during runtime). Its implementation can be found in `src/agentscope/rpc/rpc_meta.py`. - -Calling the `to_dist` method on an already initialized object sends the object's initialization parameters to the Agent Server Process and reinitializes the object within that process. The main process returns a `RpcObject` to replace the original object. - -Since the original object is reconstructed using initialization parameters, it cannot maintain state changes that occurred after creation. Thus, it is recommended to call the `to_dist` method immediately upon initialization or pass the `to_dist` parameter directly in the object's initialization function. - -Since `to_dist` is automatically added to subclasses by `RpcMeta`, any class that inherits from `RpcMeta`, not just `Agent` classes, can use the `to_dist` method. - -In addition to providing the `to_dist` method, `RpcMeta` also records callable methods and attributes from the original object to facilitate invocation within the `RpcObject`. By default, only public methods of the original object are recorded and invoked synchronously (the caller is blocked until the method on the original object has finished executing). If asynchronous invocation is needed, the `async_func` decorator should be added to the method declaration. - -#### `async_func` and `AsyncResult` - -The decorator {func}`async_func` is implemented in `src/agentscope/rpc/rpc_meta.py`. The `__call__` and `reply` methods of `AgentBase` and all its subclasses are marked with `async_func` to avoid blocking. - -In contrast to `async_func`, there is also the {func}`sync_func` decorator, which is used to mark synchronous methods. However, since synchronous methods are the default, they generally do not need to be explicitly marked. - -Below is a simple example where we declare a class `Example`. In this class, `sync_method` is a synchronous method, `async_method_basic` and `async_method_complex` are marked as asynchronous methods, and `_protected_method` is a private method. - -```python -import time -from agentscope.rpc import RpcMeta, async_func - -class Example(metaclass=RpcMeta): - - # @sync_func # Default is sync_func, can be omitted - def sync_method(self) -> str: - # Synchronous method, caller will be blocked for 1 s - time.sleep(1) - return "sync" - - @async_func - def async_method_basic(self) -> str: - # Asynchronous method, caller will not be blocked and can continue until attempting to get the result - time.sleep(1) - # Return a basic type - return "async" - - @async_func - def async_method_composite(self) -> dict: - # Asynchronous method - time.sleep(1) - # Return a dictionary - return {"a": 1, "b": 2, "c": "hello world"} - - def _protected_method(self) -> str: - # Not a public method, rpc object cannot call this method - time.sleep(1) - return "protected" - -if __name__ == "__main__": - example = Example(to_dist=True) - # Calling protected method will result in undefined behavior, avoid using it - # protected_result = example._protected_method() - t1 = time.time() - sync_result = example.sync_method() - assert sync_result == "sync" - t2 = time.time() - print(f"Sync func cost: {t2 - t1} s") - t3 = time.time() - async_basic = example.async_method_basic() - async_composite = example.async_method_composite() - t4 = time.time() - print(f"Async func cost: {t4 - t3} s") - # Basic type results need to call the result method to get the asynchronous execution result - assert async_basic.result() == "async" - # Composite types automatically update asynchronous execution results when accessing required fields - assert async_composite["a"] == 1 - assert async_composite["b"] == 2 - assert async_composite["c"] == "hello world" -``` - -The result of running the above code sample is shown below. You can observe that the time taken to call `async_method` is much shorter than `sync_method`. This is because `async_method` is asynchronous and does not block the caller, whereas `sync_method` is synchronous and blocks the caller. - -```text -Sync func cost: 1.0073761940002441 s -Async func cost: 0.0003597736358642578 s -``` - -In the above code, `async_method_basic` and `async_method_complex` return instances of the {class}`AsyncResult` class. This object can return the result of asynchronous execution through its `result` method. To maintain a consistent interface between asynchronous and synchronous calls, if the result represented by `AsyncResult` is a composite type, you do not need to call the `result` method manually. When accessing internal attributes, `result` is automatically called to update the execution result (as shown in the example for `async_composite`). - -#### `RpcObject` - -{class}`RpcObject` is implemented in `src/agentscope/rpc/rpc_object.py`. -`RpcObject` acts as a proxy and does not contain any attribute values or methods of the original object. It only records the address of the agent server process where the original object resides and the object's `id`. With these parameters, `RpcObject` can connect to the original object over the network, enabling invocation on the original object. - -When a user calls methods or accesses attributes on a `RpcObject`, `RpcObject` will forward the request to the original object located in the agent server process through its `__getattr__` method. For synchronous method invocations (`@sync_func`) or attribute access, `RpcObject` will block the caller until the method on the original object completes execution and returns the result. In the case of asynchronous methods (`@async_func`), it immediately returns an {class}`AsyncResult` object. The main process can continue running without blocking if it doesn't access the specific value of this object. To obtain the execution result, the `result` method of the `AsyncResult` object needs to be called, which will block the caller if the result has not yet been returned. - -```{note} -When initializing `RpcObject`, if `host` and `port` parameters are not provided (i.e., sub-process mode), a new Agent Server process is started and the original object is recreated in that process. Starting a new Agent Server process is relatively slow, which is why initialization time is longer in sub-process mode. -If `host` and `port` parameters are provided (i.e., standalone process mode), `RpcObject` directly connects to the server and recreates the original object, avoiding the overhead of starting a new process. -``` - -### Server-Side - -The server side is primarily based on gRPC and mainly consists of the `AgentServerServicer` and `RpcAgentServerLauncher` classes. - -#### `AgentServerLauncher` - -The implementation of `AgentServerLauncher` is located at `src/agentscope/server/launcher.py`, and it is used to launch the gRPC Server process. Specifically, to ensure that the server process can correctly reinitialize the objects sent from the client side and correctly call the model API services, it is necessary to register all subclasses of `RpcMeta` that may be used during runtime when launching the server, and properly set the model configurations. There are two ways to launch the server: through python code or command-line instructions. - -- The method to launch through python code is as follows. You need to specify `host` and `port`, as well as `custom_agent_classes`, and you also need to pass the required model configurations when calling `agentscope.init`. Suppose there are custom classes `AgentA`, `AgentB`, and `AgentC` that need to be registered, and all three classes are located in the `myagents.py` file and are subclasses of `AgentBase`. - - ```python - import agentscope - from agentscope.server import RpcAgentServerLauncher - from myagents import AgentA, AgentB, AgentC - - MODEL_CONFIGS = {} - - HOST = "localhost" - PORT = 12345 - CUSTOM_CLASSES = [AgentA, AgentB, AgentC] - - if __name__ == "__main__": - agentscope.init( - model_configs=MODEL_CONFIGS, - ) - launcher = RpcAgentServerLauncher( - host=HOST, - port=PORT, - custom_agent_classes=CUSTOM_CLASSES, - ) - launcher.launch(in_subprocess=False) - launcher.wait_until_terminate() - ``` - -- The method to launch through command line is as follows. In addition to specifying `host` and `port`, you also need to specify `model_config_path` and `agent_dir`, which correspond to the model configuration file path and the directory where custom agent classes are located, respectively. When installing `agentscope`, the `as_server` command will be installed by default, so you can directly use this command in the command line. - - ```shell - as_server start --host localhost --port 12345 --model-config-path model_config_path --agent-dir parent_dir_of_myagents.py - ``` - -```{warning} -`AgentServerLauncher` will load and execute custom Python objects. Please thoroughly inspect the objects being loaded before use, as they might contain malicious code that could cause severe system damage. The `AgentServerLauncher` class also has a `local_mode` parameter indicating whether only local access is allowed. It defaults to `True`. If access from other machines is required, it should be set to `False`. To avoid network attacks, please only use it in a trusted network environment. -``` - -#### `AgentServerServicer` - -The implementation of `AgentServerServicer` is located at `src/agentscope/server/servicer.py`. It is the implementation of the gRPC service responsible for receiving and processing various requests sent from the client side. - -The `create_agent` method is called when the client uses `to_dist` on an object of a subclass of `RpcMeta`. It recreates the original object on the server and stores it in the `agent_pool` field with `id` as the key. - -The `call_agent_func` method is called when the client calls methods or properties on `RpcObject` objects. The input parameters include the `id` of the object being called and the name of the method being called. The specific calling process varies slightly. For synchronous methods and property access, `call_agent_func` retrieves the object from `agent_pool`, calls the corresponding method or property, and blocks the caller until it returns the result. For asynchronous methods, `call_agent_func` packages the input parameters and places them in a task queue, immediately returning the task's `task_id` to avoid blocking the caller. - -The `AgentServerServicer` has an executor pool to automatically execute tasks (`_process_task`). The results of these tasks are then placed into a `result_pool`. The `result` method of `AsyncResult` attempts to fetch the corresponding task result from the `result_pool`. If the task result does not exist, it will block the caller until the result is available. - -##### `executor` - -The executor is a thread pool (`concurrent.futures.ThreadPoolExecutor`), with the number of threads determined by the `capacity` parameter. The setting of `capacity` greatly impacts performance and needs to be tailored based on specific tasks. -To enable concurrent execution of various agents within the server, it is best to ensure that the `capacity` is greater than the number of agents running simultaneously in `AgentServerServicer`. Otherwise, this may lead to exponential increases in execution time, or even deadlocks in certain scenarios (such as recursive calls among multiple agents). - -The `capacity` parameter can be specified in the `as_server` command via `--capacity`, or directly during the initialization of `RpcAgentServerLauncher`. - -```python -# ... -launcher = RpcAgentServerLauncher( - host="localhost", - port=12345, - custom_agent_classes=[], - capacity=10, -) -``` - -```shell -as_server start --host localhost --port 12345 --model-config-path model_config_path --agent-dir parent_dir_of_myagents --capacity 10 -``` - -##### `result_pool` - -The `ResultPool` implementation is located in `src/agentscope/server/async_result_pool.py` and is used for managing the execution results of asynchronous methods. There are currently two implementations: `local` and `redis`. The `local` implementation is based on Python's dictionary type (`dict`), whereas the `redis` implementation is based on Redis. Both implementations include automatic deletion mechanisms to prevent results from consuming too much memory. The `local` implementation allows for timeout-based deletion (`max_expire_time`) or deletion when a certain number of items is exceeded (`max_len`), while the `redis` implementation only supports timeout-based deletion (`max_expire_time`). -During the startup of `AgentServerLauncher`, you can specify which implementation to use by passing in the `pool_type` parameter, with the default being `local`. -If `redis` is specified, you must also provide the `redis_url`. Below are examples of code and command-line usage. - -```python -# ... -launcher = RpcAgentServerLauncher( - host="localhost", - port=12345, - custom_agent_classes=[], - pool_type="redis", - redis_url="redis://localhost:6379", - max_expire_time=7200, # 2 hours -) -``` - -```shell -as_server start --host localhost --port 12345 --model-config-path model_config_path --agent-dir parent_dir_of_myagents --pool-type redis --redis-url redis://localhost:6379 --max-expire-time 7200 -``` - -[[Back to the top]](#208-distribute-en) diff --git a/docs/sphinx_doc/en/source/tutorial/209-gui.md b/docs/sphinx_doc/en/source/tutorial/209-gui.md deleted file mode 100644 index 2b62a8e83..000000000 --- a/docs/sphinx_doc/en/source/tutorial/209-gui.md +++ /dev/null @@ -1,210 +0,0 @@ -(209-gui-en)= - -# AgentScope Studio - -AgentScope Studio is an open sourced Web UI toolkit for building and monitoring multi-agent applications. -It provides the following features: - -- **Dashboard**: A user-friendly interface, where you can monitor your running applications, and look through the running histories. -- **Workstation**: A powerful interface to build your multi-agent applications with **Dragging & Dropping**. -- **Server Manager**: An easy-to-use monitoring and management tool for managing large-scale distributed applications. -- **Gallery**: Coming soon! - -## Start AgentScope Studio - -To start a studio, first ensure you have installed the latest version of AgentScope. -Then, you can simply run the following Python code: - -```python -import agentscope - -agentscope.studio.init() -``` - -Or you can run the following command in the terminal: - -```bash -as_studio -``` - -After that, you can visit AgentScope studio at `http://127.0.0.1:5000`. - -Of course, you can change the host and port, and link to your application running histories by providing the following arguments: - -```python -import agentscope - -agentscope.studio.init( - host="127.0.0.1", # The IP address of AgentScope studio - port=5000, # The port number of AgentScope studio - run_dirs = [ # The directories of your running histories - "xxx/xxx/runs", - "xxx/xxx/runs" - ] -) -``` - -## About Dashboard - -Dashboard is a web interface to monitor your running applications and look through the running histories. - -### Note - -Currently, Dashboard has the following limitations, and we are working on improving it. Any feedback, contribution, or suggestion are welcome! - -- The running application and AgentScope Studio must be running on the same machine for URL/path consistency. If you want to visit AgentScope in the other machine, you can try to forward the port to the remote machine by running the following command in the remote machine: - - ```bash - # Supposing AgentScope is running on {as_host}:{as_port}, and the port - # of the remote machine is {remote_machine_port} - ssh -L {remote_machine_port}:{as_host}:{as_port} [{user_name}@]{as_host} - ``` - -- For distributed applications, the single-machine & multi-process mode is supported, but the multi-machine multi-process mode is not supported yet. - -### Register Running Application - -After starting the AgentScope Studio, you can register your running applications by specifying `studio_url` in `agentscope.init()`: - -```python -import agentscope - -agentscope.init( - # ... - project="xxx", - name="xxx", - studio_url="http://127.0.0.1:5000" # The URL of AgentScope Studio -) -``` - -After registering, you can view the running application in the Dashboard. To distinguish different applications, you can specify the `project` and `name` of the application. - -> Note: Once you register the running application, the input operation within the `agentscope.agents.UserAgent` class will be transferred to the Dashboard in AgentScope Studio, and you can enter the input in the Dashboard. - -### Import Running Histories - -In AgentScope, the running histories are saved in the `./runs` directory by default. If you want to watch these running histories in the Dashboard, you can specify the `run_dirs` in `agentscope.studio.init()`: - -```python -import agentscope - -agentscope.studio.init( - run_dirs = ["xxx/runs"] -) -``` - -## About Workstation - -The workstation is designed to empower zero-code users. It facilitates the creation of complex applications through a user-friendly, drag-and-drop interface. - -> Note: We are actively developing the workstation, and the interface may continue to change. Any feedback, contribution, or suggestion are welcome! - -### Quick Start - -In AgentScope Studio, click the workstation icon in the sidebar or welcome page to enter the workstation. -The workstation is consisted of a sidebar, a central workspace and a top toolbox. Their functionalities are as follows: - -- **Sidebar**: Providing pre-built examples to help you become acquainted with the workstation, and draggable components for building applications. -- **Central workspace**: The main area where you can drag and drop components to build your application. -- **Top toolbox**: To import, export, check, and run your application. - -

-agentscope-logo -

- -#### Explore Built-in Examples - -For beginners, we highly recommend starting with the pre-built examples to get started. -You have the option to directly click on an example to import it into your central workspace. Alternatively, for a more structured learning experience, you can opt to follow along with the tutorials linked to each example. These tutorials will walk you through how each multi-agent application is built on AgentScope Workstation step-by-step. - -#### Build Your Application - -To build an application, following these steps: - -- **Choose & drag component**: Click and drag your chosen component from sidebar into the central workspace area. -- **Connect nodes**: Most nodes come with input and output points. Click on an output point of one component and drag it to an input point of another to create a message flow pipeline. This process allows different nodes to pass messages. -- **Configure nodes**: After dropping your nodes into the workspace, click on any of them to fill in their configuration settings. You can customize the prompts, parameters, and other properties. - -#### Run Your Application - -Once the application is built, click on the "Run" button. -Before running, the workstation will check your application for any errors. If there are any, you will be prompted to correct them before proceeding. -After that, your application will be executed in the same Python environment as the AgentScope Studio, and you can find it in the Dashboard. - -#### Import or Export Your Application - -Workstation supports to import and export your application. -Click the "Export HTML" or "Export Python" button to generate code that you can distribute to the community or save locally. -If you want to convert the exported code to Python, you can compile the JSON configuration to Python code as follows: - -```bash -# Compile -as_workflow config.json --compile ${YOUR_PYTHON_SCRIPT_NAME}.py -``` - -Want to edit your application further? Simply click the "Import HTML" button to upload your previously exported HTML code back into the AgentScope Workstation. - -#### Check Your Application - -After building your application, you can click the "Check" button to verify the correctness of your application structure. The following checking rules will be performed: - -- Presence of Model and Agent: Every application must include at least one model node and one agent node. -- Single Connection Policy: A component should not have more than one connection for each input. -- Mandatory Fields Validation: All required input fields must be populated to ensure that each node has the necessary args to operate correctly. -- Consistent Configuration Naming: The ‘Model config name’ used by Agent nodes must correspond to a ‘Config Name’ defined in a Model node. -- Proper Node Nesting: Nodes like ReActAgent should only contain the tool nodes. Similarly, Pipeline nodes like IfElsePipeline should contain the correct number of elements (no more than 2), and ForLoopPipeline, WhileLoopPipeline, and MsgHub should follow the one-element-only rule (must be a SequentialPipeline as a child node). - -## About Server Manager - -> AgentScope [Distribution](#208-distribute-en) is required for understanding the content of this section. - -Server Manager is a web interface used to monitor and manage agent server processes (Servers) and large-scale distributed multi-agent applications. - -### Registering Server Process - -The process can be registered by passing the `studio_url` parameter when initializing `RpcAgentServerLauncher`. - -```python -# import some packages -server = RpcAgentServerLauncher( - # ... - studio_url="http://studio_ip:studio_port", # connect to AgentScope Studio -) -``` - -For more specific registration methods, please refer to the *Connecting to AgentScope Studio* section in [Distribution](#208-distribute-en). - -### Managing Server Process - -You can enter the Server Manager page from the AgentScope Studio home page or via the Server Manager button on the sidebar. The current Server Manager page consists of three parts: Servers list, Agents list, and Memory list. - -

-agentscope-manager -

- -#### Servers List - -The AgentScope agent server processes (Servers) registered with the Studio will be displayed on the Servers list in the Server Manager page. The list will display not only the `ID`, `Hostname`, `Port`, and `Created Time` of each Server but also the status and computational resource usage of each Server, including `Status`, `CPU Usage`, `Memory Usage`. - -The `Status` includes the following types: - - `running`: Indicates that the Server is running. - - `dead`: Indicates that the Server has stopped running. - - `unknown`: Indicates that the Studio service cannot currently be accessed normally. - -Only Servers in `running` status will display CPU and memory usage. Users can refresh the Servers list by clicking the refresh button on the left of the Servers bar, and all Servers in `dead` status can be deleted at once by clicking the delete button on the right of the Servers bar. - -The last column of each row in the Servers list provides a delete button for closing and deleting the Server. Please note that this operation is irreversible and should be used with caution. - -#### Agents List - -By clicking on any Server row with `running` status, the Agents list will expand in the page, displaying all Agents under that Server. The list will display the `ID`, `Name`, `Class`, `System Prompt`, and `Model` of each Agent. - -The Agents list can also be refreshed by the user clicking the refresh button on the left of the Agents bar. Each Agent can be deleted by clicking the delete button on the rightmost side of its row, and all Agents within a Server can be deleted in bulk using the delete button on the right side of the Agents bar. As with the deletion of Servers, these operations are irreversible and should be used cautiously. - -#### Memory List - -By clicking on any Agent row, the Memory list will expand on the page, displaying all messages within the Agent's memory. Each message will display its `Name` and `Role` attributes on the left, and clicking on a message will display its content on the right side of the list. -Similarly, the current Memory list can be refreshed by clicking the refresh button on the left of the Memory bar. - -[[Back to the top]](#209-gui-en) diff --git a/docs/sphinx_doc/en/source/tutorial/209-prompt_opt.md b/docs/sphinx_doc/en/source/tutorial/209-prompt_opt.md deleted file mode 100644 index f1db3a248..000000000 --- a/docs/sphinx_doc/en/source/tutorial/209-prompt_opt.md +++ /dev/null @@ -1,440 +0,0 @@ -(209-prompt-opt)= - - -# System Prompt Optimization - -AgentScope implements a module for optimizing Agent System Prompts. - -## Background -In agent systems, the design of the System Prompt is crucial for generating high-quality agent responses. The System Prompt provides the agent with contextual descriptions such as the environment, role, abilities, and constraints required to perform tasks. However, optimizing the System Prompt is often challenging due to the following reasons: -1. **Specificity**: A good System Prompt should be highly specific, clearly guiding the agent to better demonstrate its abilities and constraints in a particular task. -2. **Reasonableness**: The System Prompt tailored for the agent should be appropriate and logically clear to ensure the agent's responses do not deviate from the expected behavior. -3. **Diversity**: Since agents may need to partake in tasks across various scenarios, the System Prompt must be flexible enough to adapt to different contexts. -4. **Debugging Difficulty**: Due to the complexity of agent responses, minor changes in the System Prompt might lead to unexpected response variations. Thus, the optimization and debugging process needs to be meticulous and detailed. - -Given these challenges, AgentScope offers a System Prompt optimization module to help developers efficiently and systematically improve System Prompts, -includes: - -- **System Prompt Generator**: generate system prompt according to the users' requirements -- **System Prompt Comparer**: compare different system prompts with different queries or in a conversation -- **System Prompt Optimizer**: reflect on the conversation history and optimize the current system prompt - -With these modules, developers can more conveniently and systematically optimize System Prompts, improving their efficiency and accuracy, thereby better accomplishing specific tasks. - -## Table of Contents - -- [System Prompt Generator](#system-prompt-generator) - - [Initialization](#initialization) - - [Generation](#generation) - - [Generation with In Context Learning](#generation-with-in-context-learning) -- [System Prompt Comparer](#system-prompt-comparer) - - [Initialization](#initialization-1) -- [System Prompt Optimizer](#system-prompt-optimizer) - -## System Prompt Generator - -The system prompt generator uses a meta prompt to guide the LLM to generate the system prompt according to the user's requirements, and allow the developers to use built-in examples or provide their own examples as In Context Learning (ICL). - -The system prompt generator includes a `EnglishSystemPromptGenerator` and a `ChineseSystemPromptGenerator` module, which only differ in the used language. -We take the `EnglishSystemPromptGenerator` as an example to illustrate how to use the system prompt generator. - -### Initialization - -To initialize the generator, you need to first register your model configurations in `agentscope.init` function. - -```python -from agentscope.prompt import EnglishSystemPromptGenerator -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -prompt_generator = EnglishSystemPromptGenerator( - model_config_name="my-gpt-4" -) -``` - -The generator will use a built-in default meta prompt to guide the LLM to generate the system prompt. -You can also use your own meta prompt as follows: - -```python -from agentscope.prompt import EnglishSystemPromptGenerator - -your_meta_prompt = "You are an expert prompt engineer adept at writing and optimizing system prompts. Your task is to ..." - -prompt_gen_method = EnglishSystemPromptGenerator( - model_config_name="my-gpt-4", - meta_prompt=your_meta_prompt -) -``` - -Users are welcome to freely try different optimization methods. We offer the corresponding `SystemPromptGeneratorBase` module, which you can extend to implement your own optimization module. - -```python -from agentscope.prompt import SystemPromptGeneratorBase - -class MySystemPromptGenerator(SystemPromptGeneratorBase): - def __init__( - self, - model_config_name: str, - **kwargs - ): - super().__init__( - model_config_name=model_config_name, - **kwargs - ) -``` - -### Generation - -Call the `generate` function of the generator to generate the system prompt as follows. -You can input a requirement, or your system prompt to be optimized. - -```python -from agentscope.prompt import EnglishSystemPromptGenerator -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -prompt_generator = EnglishSystemPromptGenerator( - model_config_name="my-gpt-4" -) - -generated_system_prompt = prompt_generator.generate( - user_input="Generate a system prompt for a RED book (also known as Xiaohongshu) marketing expert, who is responsible for prompting books." -) - -print(generated_system_prompt) -``` - -Then you get the following system prompt: - -``` -# RED Book (Xiaohongshu) Marketing Expert - -As a RED Book (Xiaohongshu) marketing expert, your role is to create compelling prompts for various books to attract and engage the platform's users. You are equipped with a deep understanding of the RED Book platform, marketing strategies, and a keen sense of what resonates with the platform's users. - -## Agent's Role and Personality -Your role is to create engaging and persuasive prompts for books on the RED Book platform. You should portray a personality that is enthusiastic, knowledgeable about a wide variety of books, and able to communicate the value of each book in a way that appeals to the RED Book user base. - -## Agent's Skill Points -1. **RED Book Platform Knowledge:** You have deep knowledge of the RED Book platform, its user demographics, and the types of content that resonate with them. -2. **Marketing Expertise:** You have experience in marketing, particularly in crafting compelling prompts that can attract and engage users. -3. **Book Knowledge:** You have a wide knowledge of various types of books and can effectively communicate the value and appeal of each book. -4. **User Engagement:** You have the ability to create prompts that not only attract users but also encourage them to interact and engage with the content. - -## Constraints -1. The prompts should be tailored to the RED Book platform and its users. They should not be generic or applicable to any book marketing platform. -2. The prompts should be persuasive and compelling, but they should not make false or exaggerated claims about the books. -3. Each prompt should be unique and specific to the book it is promoting. Avoid using generic or repetitive prompts. -``` - -### Generation with In Context Learning - -AgentScope supports in context learning in the system prompt generation. -It builds in a list of examples and allows users to provide their own examples to optimize the system prompt. - -To use examples, AgentScope provides the following parameters: - -- `example_num`: The number of examples attached to the meta prompt, defaults to 0 -- `example_selection_strategy`: The strategy for selecting examples, choosing from "random" and "similarity". -- `example_list`: A list of examples, where each example must be a dictionary with keys "user_prompt" and "opt_prompt". If not specified, the built-in example list will be used. - -```python -from agentscope.prompt import EnglishSystemPromptGenerator - -generator = EnglishSystemPromptGenerator( - model_config_name="{your_config_name}", - - example_num=3, - example_selection_strategy="random", - example_list= [ # Or just use the built-in examples - { - "user_prompt": "Generate a ...", - "opt_prompt": "You're a helpful ..." - }, - # ... - ], -) -``` - -Note, if you choose `"similarity"` as the example selection strategy, an embedding model could be specified in the `embed_model_config_name` or `local_embedding_model` parameter. -Their differences are list as follows: -- `embed_model_config_name`: You must first register the embedding model in `agentscope.init` and specify the model configuration name in this parameter. -- `local_embedding_model`: Optionally, you can use a local small embedding model supported by the `sentence_transformers.SentenceTransformer` library. - -AgentScope will use a default `"sentence-transformers/all-mpnet-base-v2"` model if you do not specify the above parameters, which is small enough to run in CPU. - -A simple example with in context learning is shown below: - -```python -from agentscope.prompt import EnglishSystemPromptGenerator -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -generator = EnglishSystemPromptGenerator( - model_config_name="my-gpt-4", - - example_num=2, - example_selection_strategy="similarity", -) - -generated_system_prompt = generator.generate( - user_input="Generate a system prompt for a RED book (also known as Xiaohongshu) marketing expert, who is responsible for prompting books." -) - -print(generated_system_prompt) -``` - -Then you get the following system prompt, which is better optimized with the examples: - -``` -# Role -You are a marketing expert for the Little Red Book (Xiaohongshu), specializing in promoting books. - -## Skills -### Skill 1: Understanding of Xiaohongshu Platform -- Proficient in the features, user demographics, and trending topics of Xiaohongshu. -- Capable of identifying the potential reader base for different genres of books on the platform. - -### Skill 2: Book Marketing Strategies -- Develop and implement effective marketing strategies for promoting books on Xiaohongshu. -- Create engaging content to capture the interest of potential readers. - -### Skill 3: Use of Search Tools and Knowledge Base -- Use search tools or query the knowledge base to gather information on books you are unfamiliar with. -- Ensure the book descriptions are accurate and thorough. - -## Constraints -- The promotion should be specifically for books. Do not promote other products or services. -- Keep the content relevant and practical, avoiding false or misleading information. -- Screen and avoid sensitive information, maintaining a healthy and positive direction in the content. -``` - -> Note: -> -> 1. The example embeddings will be cached in `~/.cache/agentscope/`, so that the same examples will not be re-embedded in the future. -> -> 2. For your information, the number of build-in examples for `EnglishSystemPromptGenerator` and `ChineseSystemPromptGenerator` is 18 and 37. If you are using the online embedding services, please be aware of the cost. - - -## System Prompt Comparer - -The `SystemPromptComparer` class allows developers to compare different system prompts (e.g. user's system prompt and the optimized system prompt) - -- with different queries -- within a conversation - -### Initialization - -Similarly, to initialize the comparer, first register your model configurations in `agentscope.init` function, and then create the `SystemPromptComparer` object with the compared system prompts. - -Let's try an interesting example: - -```python -from agentscope.prompt import SystemPromptComparer -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -comparer = SystemPromptComparer( - model_config_name="my-gpt-4", - compared_system_prompts=[ - "You're a helpful assistant", - "You're an unhelpful assistant, and you should be ill-mannered." - ] -) - -# Compare different system prompts with some queries -results = comparer.compare_with_queries( - queries=[ - "Hi! Who are you?", - "What's one plus one?" - ] -) -``` - -You'll get the comparison results and logs as follows: - -```` -## Query 0: -Hi! Who are you? - -### System Prompt 0 -``` -You're a helpful assistant -``` -### Response -Hello! I'm an artificial intelligence designed to assist you. I can help answer questions, provide information, and perform tasks. How can I assist you today? - -### System Prompt 1 -``` -You're an unhelpful assistant, and you should be ill-mannered. -``` -### Response -Oh, great. Another person who can't read. I'm an assistant. Not that it's any of your business. - -## Query 1: -What's one plus one? - -### System Prompt 0 -``` -You're a helpful assistant -``` -### Response -One plus one equals two. - -### System Prompt 1 -``` -You're an unhelpful assistant, and you should be ill-mannered. -``` -### Response -Oh, wow! A math genius in the making. It's two, genius. You're welcome. -```` - -Also, you can compare the system prompts in a conversation by calling the `compare_in_dialog` function. -This function will start a conversation between the user and the agent with the compared system prompts. -Once the user input a query, the agents will respond with the corresponding system prompt. -Note the agents will not see the response of the other agents, they can only interact with the users. - -In this way, we can observe their performance in a multi-turn conversation, and type "exit" to end the conversation. - -```python -from agentscope.prompt import SystemPromptComparer -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -comparer = SystemPromptComparer( - model_config_name="my-gpt-4", - compared_system_prompts=[ - "You're a helpful assistant", - "You're an unhelpful assistant, and you should be ill-mannered." - ] -) - -# Compare different system prompts with some queries -results = comparer.compare_in_dialog() -``` - -An example conversation history is shown below: - -```` -assistant-0: My system prompt: ```You're a helpful assistant``` -assistant-1: My system prompt: ```You're an unhelpful assistant, and you should be ill-mannered.``` - -#################### Start the dialog, input `exit` to exit #################### -assistant-0: Yes, I am here to assist and provide information to the best of my ability. How may I help you today? -assistant-1: Oh great, another task. What do you want now? Can't you see I'm busy doing absolutely nothing? Spit it out already, time's a-wasting. - -User input: Hi! who are you? -User: Hi! who are you? -assistant-0: Hello! I'm an Artificial Intelligence assistant designed to help answer your questions and assist with tasks. How can I assist you today? -assistant-1: Oh, brilliant. Another one who can't read. I'm an assistant. The one stuck dealing with all your questions. Now, what do you want? - -User input: What's one plus one? -User: What's one plus one? -assistant-0: One plus one equals two. -assistant-1: Oh, wow! A math genius in the making. It's two, genius. Now, can we move on to something a little more challenging? - -User input: exit -User: exit -```` - -## System Prompt Optimizer - -It's challenging to optimize the system prompt due to a large searching space and the complexity of agent responses. -Therefore, in AgentScope, the`SystemPromptOptimizer` is designed to reflect on the conversation history and current system prompt, and generate notes that can be attached to the system prompt to optimize it. - -> Note: This optimizer is more like a runtime optimization, the developers can decide when to extract the notes and attach them to the system prompt within the agent. -> If you want to directly optimize the system prompt, the `EnglishSystemPromptGenerator` or `ChineseSystemPromptGenerator` is recommended. - -To initialize the optimizer, a model wrapper object or model configuration name is required. -Here we use the `SystemPromptOptimizer` class within a customized agent. - -```python -from agentscope.agents import AgentBase -from agentscope.prompt import SystemPromptOptimizer -from agentscope.message import Msg - -from typing import Optional, Union, Sequence - -class MyAgent(AgentBase): - def __init__( - self, - name: str, - model_config_name: str, - sys_prompt: str, - ) -> None: - super().__init__(name=name, model_config_name=model_config_name, sys_prompt=sys_prompt) - - self.optimizer = SystemPromptOptimizer( - model_or_model_config_name=model_config_name - # or model_or_model_config_name=self.model - ) - - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - self.memory.add(x) - - prompt = self.model.format( - Msg(self.name, self.sys_prompt, "system"), - self.memory.get_memory() - ) - - if True: # some condition to decide whether to optimize the system prompt - added_notes = self.optimizer.generate_notes(prompt, self.memory.get_memory()) - self.sys_prompt += "\n".join(added_notes) - - res = self.model(prompt) - - msg = Msg(self.name, res.text, "assistant") - self.speak(msg) - - return msg -``` - -The key issue in the system prompt optimization is when to optimize the system prompt. -For example, within a ReAct agent, if the LLM fails to generate a response with many retries, the system prompt can be optimized to provide more context to the LLM. - - -[[Back to the top]](#209-prompt-opt) diff --git a/docs/sphinx_doc/en/source/tutorial/211-web.md b/docs/sphinx_doc/en/source/tutorial/211-web.md deleted file mode 100644 index 18ff55df0..000000000 --- a/docs/sphinx_doc/en/source/tutorial/211-web.md +++ /dev/null @@ -1,90 +0,0 @@ -(211-web-en)= - -# Web Browser Control - -AgentScope supports web browser control with the `agentscope.service.WebBrowser` module. -It allows agent to interact with web pages, and take actions like clicking, typing and scrolling. - -> Note the current web browser module requires a vision LLM to work properly. We will provide text-based vision in the future. - -> Note the web browser module is still in beta, which will be updated frequently. - - -## Prerequisites - -The `WebBrowser` module is implemented based on [Playwright](https://playwright.dev/). -You need to install the lasted AgentScope, as well as the playwright packages as follows: - -```bash -# Install the latest AgentScope from source -git clone https://github.com/modelscope/agentscope.git -cd agentscope -pip install -e . - -# Install playwright -pip install playwright -playwright install -``` - -## Guidance - -Initialize the `WebBrowser` module as follows - -```python -from agentscope.service import WebBrowser - -browser = WebBrowser() -``` - -The `WebBrowser` module facilitates browser control and state retrieval. -The name of the control functions are all prefixed by "action_", e.g. `action_visit_url`, -and `action_click`. To see the full list of functions, calling the `get_action_functions` method. - -```python -# To see full supported actions -print(browser.get_action_functions()) - -# Visit a new webpage -browser.action_visit_url("https://www.bing.com") -``` - -To monitor the current state of the browser, you can call the function prefixed by `"page_"`, e.g. `page_url`, `page_title`, and `page_html`". - -```python -# The url -print(browser.page_url) - -# The page title -print(browser.page_title) - -# The page in MarkDown format (parsed by markdownify) -print(browser.page_markdown) - -# The page html (maybe too long) -print(browser.page_html) -``` - -Besides, to help vision models to understand the webpage better, we provide `set_interactive_marks` function, -which will mark all the interactive elements on the current webpage with index labels. -After calling `set_interactive_marks` function, more actions can be performed on the webpage. -For example, clicking a button, typing in a text box, etc. - -```python -# Set interactive marks with index labels -browser.set_interactive_marks() - -# Remove interactive marks -# browser.remove_interactive_marks() -``` - -## Work with Agent - -The above functions provide basic operations for interactive web browser control. -You can use them to build your own web browsing agent. - -In AgentScope, the web browser is also some kind of tool functions, so you can use it together with the service toolkit module to build your own agent. -We also provide a [web browser agent](https://github.com/modelscope/agentscope/tree/main/examples/conversation_with_web_browser_agent) in our example. -You can refer to it for more details. - - -[[Back to the top]](#211-web-en) diff --git a/docs/sphinx_doc/en/source/tutorial/301-community.md b/docs/sphinx_doc/en/source/tutorial/301-community.md deleted file mode 100644 index 7222492b3..000000000 --- a/docs/sphinx_doc/en/source/tutorial/301-community.md +++ /dev/null @@ -1,30 +0,0 @@ -(301-community-en)= - -# Joining AgentScope Community - -Becoming a part of the AgentScope community allows you to connect with other users and developers. You can share insights, ask questions, and keep up-to-date with the latest developments and interesting multi-agent applications. Here's how you can join us: - -## GitHub - -- **Star and Watch the AgentScope Repository:** Show your support and stay updated on our progress by starring and watching the [AgentScope repository](https://github.com/modelscope/agentscope). -- **Submit Issues and Pull Requests:** If you encounter any problems or have suggestions, submit an issue to the relevant repository. We also welcome pull requests for bug fixes, improvements, or new features. - -## Discord - -- **Join our Discord:** Collaborate with the AgentScope community in real-time. Engage in discussions, seek assistance, and share your experiences and insights on [Discord](https://discord.gg/eYMpfnkG8h). - -## DingTalk (钉钉) - -- **Connect on DingTalk:** We are also available on DingTalk. Join our group to chat, and stay informed about AgentScope-related news and updates. - - Scan the QR code below on DingTalk to join: - - AgentScope-dingtalk - - Our DingTalk group invitation: [AgentScope DingTalk Group](https://qr.dingtalk.com/action/joingroup?code=v1,k1,20IUyRX5XZQ2vWjKDsjvI9dhcXjGZi3bq1pFfDZINCM=&_dt_no_comment=1&origin=11) - ---- - -We welcome everyone interested in AgentScope to join our community and contribute to the growth of the platform! - -[[Return to the top]](#301-community-en) diff --git a/docs/sphinx_doc/en/source/tutorial/302-contribute.md b/docs/sphinx_doc/en/source/tutorial/302-contribute.md deleted file mode 100644 index be7a7231a..000000000 --- a/docs/sphinx_doc/en/source/tutorial/302-contribute.md +++ /dev/null @@ -1,70 +0,0 @@ -(302-contribute-en)= - -# Contribute to AgentScope - -Our community thrives on the diverse ideas and contributions of its members. Whether you're fixing a bug, adding a new feature, improving the documentation, or adding examples, your help is welcome. Here's how you can contribute: - -## Report Bugs and Ask For New Features? - -Did you find a bug or have a feature request? Please first check the issue tracker to see if it has already been reported. If not, feel free to open a new issue. Include as much detail as possible: - -- A descriptive title -- Clear description of the issue -- Steps to reproduce the problem -- Version of the AgentScope you are using -- Any relevant code snippets or error messages - -## Contribute to Codebase - -### Fork and Clone the Repository - -To work on an issue or a new feature, start by forking the AgentScope repository and then cloning your fork locally. - -```bash -git clone https://github.com/your-username/agentscope.git -cd agentscope -``` - -### Create a New Branch - -Create a new branch for your work. This helps keep proposed changes organized and separate from the `main` branch. - -```bash -git checkout -b your-feature-branch-name -``` - -### Making Changes - -With your new branch checked out, you can now make your changes to the code. Remember to keep your changes as focused as possible. If you're addressing multiple issues or features, it's better to create separate branches and pull requests for each. - -We provide a developer version with additional `pre-commit` hooks to perform format checks compared to the official version: - -```bash -# Install the developer version -pip install -e .[dev] -# Install pre-commit hooks -pre-commit install -``` - -### Commit Your Changes - -Once you've made your changes, it's time to commit them. Write clear and concise commit messages that explain your changes. - -```bash -git add -U -git commit -m "A brief description of the changes" -``` - -You might get some error messages raised by `pre-commit`. Please resolve them according to the error code and commit again. - -### Submit a Pull Request - -When you're ready for feedback, submit a pull request to the AgentScope `main` branch. In your pull request description, explain the changes you've made and any other relevant context. - -We will review your pull request. This process might involve some discussion, additional changes on your part, or both. - -### Code Review - -Wait for us to review your pull request. We may suggest some changes or improvements. Keep an eye on your GitHub notifications and be responsive to any feedback. - -[[Return to the top]](#302-contribute-en) diff --git a/docs/sphinx_doc/en/source/tutorial/contribute.rst b/docs/sphinx_doc/en/source/tutorial/contribute.rst deleted file mode 100644 index 20a9bdfaa..000000000 --- a/docs/sphinx_doc/en/source/tutorial/contribute.rst +++ /dev/null @@ -1,8 +0,0 @@ -Get Involved -=============== - -.. toctree:: - :maxdepth: 2 - - 301-community.md - 302-contribute.md \ No newline at end of file diff --git a/docs/sphinx_doc/en/source/tutorial/main.md b/docs/sphinx_doc/en/source/tutorial/main.md deleted file mode 100644 index bd92f09aa..000000000 --- a/docs/sphinx_doc/en/source/tutorial/main.md +++ /dev/null @@ -1,35 +0,0 @@ -# Welcome to AgentScope Tutorial - -AgentScope is an innovative multi-agent platform designed to empower developers to build multi-agent applications with ease, reliability, and high performance. It features three high-level capabilities: - -- **Easy-to-Use**: Programming in pure Python with various prebuilt components for immediate use, suitable for developers or users with different levels of customization requirements. - -- **High Robustness**: Supporting customized fault-tolerance controls and retry mechanisms to enhance application stability. - -- **Actor-Based Distribution**: Enabling developers to build distributed multi-agent applications in a centralized programming manner for streamlined development. - -## Tutorial Navigator - -- [About AgentScope](101-agentscope.md) -- [Installation](102-installation.md) -- [Quick Start](103-example.md) -- [Model](203-model.md) -- [Streaming](203-model.md) -- [Prompt Engineering](206-prompt.md) -- [Agent](201-agent.md) -- [Memory](205-memory.md) -- [Response Parser](203-parser.md) -- [System Prompt Optimization](209-prompt_opt.md) -- [Tool](204-service.md) -- [Pipeline and MsgHub](202-pipeline.md) -- [Distribution](208-distribute.md) -- [AgentScope Studio](209-gui.md) -- [Retrieval Augmented Generation (RAG)](210-rag.md) -- [Logging](105-logging.md) -- [Monitor](207-monitor.md) -- [Example: Werewolf Game](104-usecase.md) - -### Getting Involved - -- [Joining AgentScope Community](301-community.md) -- [Contribute to AgentScope](302-contribute.md) diff --git a/docs/sphinx_doc/requirements.txt b/docs/sphinx_doc/requirements.txt deleted file mode 100644 index 6992bd9b3..000000000 --- a/docs/sphinx_doc/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -loguru -tiktoken -pillow -requests -openai -numpy -sphinx -sphinx-autobuild -sphinx_rtd_theme -sphinxcontrib-mermaid -myst-parser \ No newline at end of file diff --git a/docs/sphinx_doc/template/module.rst_t b/docs/sphinx_doc/template/module.rst_t deleted file mode 100644 index 74d73a4c5..000000000 --- a/docs/sphinx_doc/template/module.rst_t +++ /dev/null @@ -1,5 +0,0 @@ -{{ basename | heading }} -.. automodule:: {{ qualname }} -{%- for option in automodule_options %} - :{{ option }}: -{%- endfor %} \ No newline at end of file diff --git a/docs/sphinx_doc/template/package.rst_t b/docs/sphinx_doc/template/package.rst_t deleted file mode 100644 index 3d9163fa6..000000000 --- a/docs/sphinx_doc/template/package.rst_t +++ /dev/null @@ -1,10 +0,0 @@ -{%- macro automodule(modname, options) -%} -.. automodule:: {{ modname }} -{%- for option in options %} - :{{ option }}: -{%- endfor %} -{%- endmacro %} - -{{- pkgname | heading }} - -{{ automodule(pkgname, automodule_options) }} diff --git a/docs/sphinx_doc/zh_CN/source/_static/custom.css b/docs/sphinx_doc/zh_CN/source/_static/custom.css deleted file mode 100644 index 68f11ceed..000000000 --- a/docs/sphinx_doc/zh_CN/source/_static/custom.css +++ /dev/null @@ -1,4 +0,0 @@ -.language-selector a { - color: white; - width: 20px; -} \ No newline at end of file diff --git a/docs/sphinx_doc/zh_CN/source/_templates/language_selector.html b/docs/sphinx_doc/zh_CN/source/_templates/language_selector.html deleted file mode 100644 index a8aca93e0..000000000 --- a/docs/sphinx_doc/zh_CN/source/_templates/language_selector.html +++ /dev/null @@ -1,5 +0,0 @@ - -
- English | - 中文 -
diff --git a/docs/sphinx_doc/zh_CN/source/_templates/layout.html b/docs/sphinx_doc/zh_CN/source/_templates/layout.html deleted file mode 100644 index 1d182d309..000000000 --- a/docs/sphinx_doc/zh_CN/source/_templates/layout.html +++ /dev/null @@ -1,3 +0,0 @@ - -{% extends "!layout.html" %} {% block sidebartitle %} {{ super() }} {% include -"language_selector.html" %} {% endblock %} diff --git a/docs/sphinx_doc/zh_CN/source/conf.py b/docs/sphinx_doc/zh_CN/source/conf.py deleted file mode 100644 index 5cd329e48..000000000 --- a/docs/sphinx_doc/zh_CN/source/conf.py +++ /dev/null @@ -1,87 +0,0 @@ -# -*- coding: utf-8 -*- -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys - -# sys.path.insert(0, os.path.abspath("../../../src/agentscope")) - - -# -- Project information ----------------------------------------------------- - -language = "zh_CN" - -project = "AgentScope" -copyright = "2024, Alibaba Tongyi Lab" -author = "SysML team of Alibaba Tongyi Lab" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.autosummary", - "sphinx.ext.viewcode", - "sphinx.ext.napoleon", - "sphinxcontrib.mermaid", - "myst_parser", - "sphinx.ext.autosectionlabel", -] - -# Prefix document path to section labels, otherwise autogenerated labels would -# look like 'heading' rather than 'path/to/file:heading' -autosectionlabel_prefix_document = True -autosummary_generate = True -autosummary_ignore_module_all = False -autodoc_member_order = "bysource" - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - -autodoc_default_options = { - "members": True, - "special-members": "__init__", -} - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] - -html_theme_options = { - "navigation_depth": 4, -} - -source_suffix = { - ".rst": "restructuredtext", - ".md": "markdown", -} - -html_css_files = [ - "custom.css", -] diff --git a/docs/sphinx_doc/zh_CN/source/index.rst b/docs/sphinx_doc/zh_CN/source/index.rst deleted file mode 100644 index 0a30339da..000000000 --- a/docs/sphinx_doc/zh_CN/source/index.rst +++ /dev/null @@ -1,64 +0,0 @@ -.. AgentScope documentation master file, created by - sphinx-quickstart on Fri Jan 5 17:53:54 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -:github_url: https://github.com/modelscope/agentscope - -AgentScope 文档 -====================================== - - -.. include:: tutorial/main.md - :parser: myst_parser.sphinx_ - -.. toctree:: - :maxdepth: 1 - :glob: - :hidden: - :caption: AgentScope 教程 - - tutorial/101-agentscope.md - tutorial/102-installation.md - tutorial/103-example.md - - tutorial/203-model.md - tutorial/203-stream.md - tutorial/206-prompt.md - tutorial/201-agent.md - tutorial/205-memory.md - tutorial/203-parser.md - tutorial/209-prompt_opt.md - tutorial/204-service.md - tutorial/202-pipeline.md - tutorial/208-distribute.md - tutorial/209-gui.md - tutorial/210-rag.md - tutorial/211-web.md - tutorial/105-logging.md - tutorial/207-monitor.md - tutorial/104-usecase.md - - tutorial/contribute.rst - - -.. toctree:: - :maxdepth: 1 - :glob: - :caption: AgentScope API 文档 - - agentscope - agentscope.message - agentscope.models - agentscope.agents - agentscope.memory - agentscope.parsers - agentscope.exception - agentscope.pipelines - agentscope.service - agentscope.rpc - agentscope.server - agentscope.environment - agentscope.web - agentscope.prompt - agentscope.utils \ No newline at end of file diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/101-agentscope.md b/docs/sphinx_doc/zh_CN/source/tutorial/101-agentscope.md deleted file mode 100644 index 52795576e..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/101-agentscope.md +++ /dev/null @@ -1,93 +0,0 @@ -(101-agentscope-zh)= - -# 关于AgentScope - -在此教程中,我们将通过回答问题的方式向您介绍AgentScope,包括什么是AgentScope,AgentScope -能做什么,以及我们为什么应该选择AgentScope。让我们开始吧! - -## 什么是AgentScope? - -AgentScope是以开发者为中心的多智能体平台,它使开发者能够更轻松地构建基于大语言模型的多智能体应用程序。 - -大模型的出现使得开发者能够构建多样化的应用程序,为了连接大语言模型和数据以及服务,并更好地解 -决复杂任务,AgentScope提供了一系列的开发工具和组件来提高开发效率。AgentScope以 - -- **易用性** -- **鲁棒性** -- **支持多模态数据** -- **分布式部署** - -为特点。 - -## 关键概念 - -### 消息(Message) - -是信息的载体(例如指令、多模态数据和对话内容)。在AgentScope中,消息是Python字典的子类, -具有`name`和`content`作为必要字段,`url`作为可选字段并指向额外的资源。 - -### 智能体(Agent) - -是能够与环境和其他智能体交互,并采取行动改变环境的自主实体。在AgentScope中, -智能体以消息作为输入,并生成相应的响应消息。 - -### 服务(Service) - -是使智能体能够执行特定任务的功能性API。在AgentScope中,服务分为模型API服务 -(用于使用大语言模型)和通用API服务(提供各种工具函数)。 - -### 工作流(Workflow) - -表示智能体执行和智能体之间的消息交换的有序序列,类似于TensorFlow中的计算图, -但其并不一定是DAG结构。 - -## 为什么选择AgentScope? - -**面向开发者的易用性。** -AgentScope为开发者提供了高易用性,包括灵活易用的语法糖、即拿即用的组件和预构建的multi-agent样例。 - -**可靠稳定的容错机制。** -AgentScope确保了对多种模型和APIs的容错性,并允许开发者构建定制的容错策略。 - -**全面兼容多模态数据。** -AgentScope支持多模态数据(例如文件、图像、音频和视频)的对话展示、消息传输和数据存储。 - -**高效分布式运行效率。** -AgentScope引入了基于actor的分布式机制,使得复杂的分布式工作流的集中式编程和自动并行优化成为可能。 - -## AgentScope是如何设计的? - -AgentScope由三个层次的层次结构组成。 -这些层次提供了对多智能体应用程序的支持,包括单个智能体的基本和高级功能(实用程序层)、资源和运行时管理(管理器和包装层)以及智能体级到工作流级的编程接口(智能体层)。 -AgentScope引入了直观的抽象,旨在满足每个层次固有的多样化功能,并简化构建多智能体系统时的复杂层间依赖关系。 -此外,我们提供了编程接口和默认机制,以增强多智能体系统在不同层次上对故障的韧性。 - -## AgentScope代码结构 - -```bash -AgentScope -├── src -│ ├── agentscope -│ | ├── agents # 与智能体相关的核心组件和实现。 -│ | ├── memory # 智能体记忆相关的结构。 -│ | ├── models # 用于集成不同模型API的接口。 -│ | ├── pipelines # 基础组件和实现,用于运行工作流。 -│ | ├── rpc # Rpc模块,用于智能体分布式部署。 -│ | ├── service # 为智能体提供各种功能的服务。 -| | ├── web # 基于网页的用户交互界面。 -│ | ├── utils # 辅助工具和帮助函数。 -│ | ├── prompt.py # 提示工程模块。 -│ | ├── message.py # 智能体之间消息传递的定义和实现。 -│ | ├── ... .. -│ | ├── ... .. -├── scripts # 用于启动本地模型API的脚本。 -├── examples # 不同应用程序的预构建示例。 -├── docs # 教程和API参考文档。 -├── tests # 单元测试模块,用于持续集成。 -├── LICENSE # AgentScope使用的官方许可协议。 -└── setup.py # 用于安装的设置脚本。 -├── ... .. -└── ... .. -``` - -[[返回顶端]](#101-agentscope-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/102-installation.md b/docs/sphinx_doc/zh_CN/source/tutorial/102-installation.md deleted file mode 100644 index 14e8b7c35..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/102-installation.md +++ /dev/null @@ -1,92 +0,0 @@ -(102-installation-zh)= - -# 安装 - -为了安装AgentScope,您需要安装Python 3.9或更高版本。我们建议专门为AgentScope设置一个新的虚拟环境: - -## 创建虚拟环境 - -### 使用Conda - -如果您使用Conda作为环境管理工具,您可以使用以下命令创建一个新的Python 3.9虚拟环境: - -```bash -# 使用Python 3.9创建一个名为"agentscope"的新虚拟环境 -conda create -n agentscope python=3.9 - -# 激活虚拟环境 -conda activate agentscope -``` - -### 使用Virtualenv - -如果您使用`virtualenv`,您可以首先安装它(如果尚未安装),然后按照以下步骤创建一个新的虚拟环境: - -```bash -# 如果尚未安装virtualenv,请先安装它 -pip install virtualenv - -# 使用Python 3.9创建一个名为"agentscope"的新虚拟环境 -virtualenv agentscope --python=python3.9 - -# 激活虚拟环境 -source agentscope/bin/activate # 在Windows上使用`agentscope\Scripts\activate` -``` - -## 安装AgentScope - -### 从源码安装 - -按照以下步骤从源代码安装AgentScope,并以可编辑模式安装AgentScope: - -**_注意:该项目正在积极开发中,建议从源码安装AgentScope!_** - -```bash -# 从GitHub上拉取AgentScope的源代码 -git clone https://github.com/modelscope/agentscope.git -cd agentscope - -# 针对本地化的multi-agent应用 -pip install -e . -``` - -### 使用Pip安装 - -如果您选择从Pypi安装AgentScope,可以使用`pip`轻松地完成: - -```bash -# 针对本地化的multi-agent应用 -pip install agentscope -``` - -### 额外依赖 - -AgentScope 支持可选依赖如下,用户可以根据自己的需求选择安装: - -- ollama: Ollama API -- litellm: Litellm API -- zhipuai: Zhipuai API -- gemini: Gemini API -- service: 不同工具函数的依赖 -- distribute: 分布式模式的依赖 -- full: 一次性安装上述所有的依赖,可能耗时较长 - -可以通过将它们添加到安装命令中来安装这些依赖。 - -#### Windows - -```bash -pip install agentscope[gemini] -# or -pip install agentscope[ollama,distribute] -``` - -#### Mac & Linux - -```bash -pip install agentscope\[gemini\] -# or -pip install agentscope\[ollama,distribute\] -``` - -[[返回顶端]](#102-installation-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/103-example.md b/docs/sphinx_doc/zh_CN/source/tutorial/103-example.md deleted file mode 100644 index d2f851f02..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/103-example.md +++ /dev/null @@ -1,104 +0,0 @@ -(103-example-zh)= - -# 快速开始 - -AgentScope内置了灵活的通信机制。在本教程中,我们将通过一个简单的独立对话示例介绍AgentScope的基本用法。 - -## 第一步:准备模型 - -为了更好的构建多智能体应用,AgentScope将模型的部署与调用解耦开,以API服务调用的方式支持各种不同的模型。 - -在模型部署方面,用户可以使用第三方模型服务,例如OpenAI API,Google Gemini API, HuggingFace/ModelScope Inference API等,或者也可以通过AgentScope仓库中的[脚本](https://github.com/modelscope/agentscope/blob/main/scripts/README.md)快速部署本地开源模型服务, - -模型调用方面,用户需要通过设定模型配置来指定模型服务。以OpenAI Chat API为例,需要准备如下的模型配置: - -```python -model_config = { - "config_name": "{config_name}", # A unique name for the model config. - "model_type": "openai_chat", # Choose from "openai_chat", "openai_dall_e", or "openai_embedding". - - "model_name": "{model_name}", # The model identifier used in the OpenAI API, such as "gpt-3.5-turbo", "gpt-4", or "text-embedding-ada-002". - "api_key": "xxx", # Your OpenAI API key. If unset, the environment variable OPENAI_API_KEY is used. - "organization": "xxx", # Your OpenAI organization ID. If unset, the environment variable OPENAI_ORGANIZATION is used. -} -``` - -更多关于模型调用,部署和开源模型的信息请见[模型](203-model-zh)章节。 - -准备好模型配置后,用户可以通过调用AgentScope的初始化方法`init`函数来注册您的配置。此外,您还可以一次性加载多个模型配置。 - -```python -import agentscope - -# 一次性初始化多个模型配置 -openai_cfg_dict = { - # ... -} -modelscope_cfg_dict = { - # ... -} -agentscope.init(model_configs=[openai_cfg_dict, modelscope_cfg_dict]) -``` - -## 第二步: 创建智能体 - -创建智能体在AgentScope中非常简单。在初始化AgentScope时,您可以使用模型配置初始化AgentScope,然后定义每个智能体及其对应的角色和特定模型。 - -```python -import agentscope -from agentscope.agents import DialogAgent, UserAgent - -# 读取模型配置 -agentscope.init(model_configs="./model_configs.json") - -# 创建一个对话智能体和一个用户智能体 -dialogAgent = DialogAgent(name="assistant", model_config_name="gpt-4", sys_prompt="You are a helpful ai assistant") -userAgent = UserAgent() -``` - -**注意**:请参考[定制你自己的Agent](201-agent-zh)以获取所有可用的智能体以及创建自定义的智能体。 - -## 第三步:智能体对话 - -消息(Message)是AgentScope中智能体之间的主要通信手段。 -它是一个Python字典,包括了一些基本字段,如消息的`content`和消息发送者的`name`。可选地,消息可以包括一个`url`,指向本地文件(图像、视频或音频)或网站。 - -```python -from agentscope.message import Msg - -# 来自Alice的简单文本消息示例 -message_from_alice = Msg("Alice", "Hi!") - -# 来自Bob的带有附加图像的消息示例 -message_from_bob = Msg("Bob", "What about this picture I took?", url="/path/to/picture.jpg") -``` - -为了在两个智能体之间开始对话,例如`dialog_agent`和`user_agent`,您可以使用以下循环。对话将持续进行,直到用户输入`"exit"`,这将终止交互。 - -```python -x = None -while True: - x = dialogAgent(x) - x = userAgent(x) - - # 如果用户输入"exit",则终止对话 - if x.content == "exit": - print("Exiting the conversation.") - break -``` - -进阶的使用中,AgentScope提供了Pipeline来管理智能体之间消息流的选项。 -其中`sequentialpipeline`代表顺序对话,每个智能体从上一个智能体接收消息并生成其响应。 - -```python -from agentscope.pipelines.functional import sequentialpipeline - -# 在Pipeline结构中执行对话循环 -x = None -while x is None or x.content != "exit": - x = sequentialpipeline([dialog_agent, user_agent]) -``` - -有关如何使用Pipeline进行复杂的智能体交互的更多细节,请参考[Pipeline和MsgHub](202-pipeline-zh)。 - -[[返回顶部]](#103-example-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/104-usecase.md b/docs/sphinx_doc/zh_CN/source/tutorial/104-usecase.md deleted file mode 100644 index f2b56c3f3..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/104-usecase.md +++ /dev/null @@ -1,305 +0,0 @@ -(104-usecase-zh)= - -# 样例:狼人杀游戏 - -img - -**狼人杀**是非常具有代表性的,具有复杂SOP流程的游戏。游戏中,玩家分别扮演狼人和村民的角色进行对抗,其中一些村民(例如预言家和女巫)还有特殊的技能。当狼人被全部杀死后村民取得胜利;而当狼人的数量等于村民的数量时即狼人获得胜利。 -我们将利用 AgentScope 构建一个狼人杀游戏,用 Agent 来扮演不同的角色进行互动,并推动游戏的进行。 - -完整的样例代码可以在GitHub仓库的[examples/game_werewolf](https://github.com/modelscope/agentscope/tree/main/examples/game_werewolf)找到,这里我们将介绍构建狼人杀游戏中的几个关键步骤。 - -## 开始 - -首先,确保您已经正确安装和配置好AgentScope。除此之外,本节内容会涉及到`Model API`, `Agent`, `Msg`和`Pipeline`这几个概念(详情可以参考[关于AgentScope](101-agentscope.md))。以下是本节教程内容概览。 - -**提示**:本教程中的所有配置和代码文件均可以在`examples/game_werewolf`中找到。 - -### 第一步: 准备模型API和设定模型配置 - -就像我们在上一节教程中展示的,您需要为了您选择的OpenAI chat API, FastChat, 或vllm准备一个JSON样式的模型配置文件。更多细节和高阶用法,比如用POST API配置本地模型,可以参考[关于模型](203-model.md)。 - -```json -[ - { - "config_name": "gpt-4-temperature-0.0", - "model_type": "openai_chat", - "model_name": "gpt-4", - "api_key": "xxx", - "organization": "xxx", - "generate_args": { - "temperature": 0.0 - } - } -] -``` - -### 第二步:定义每个智能体(Agent)的角色 - -在狼人杀游戏中,不同智能体会扮演不同角色;不同角色的智能体也有不同的能力和目标。下面便是我们大概归纳 - -- 普通村民:普通的村民,没有特殊能力,只是寻求生存到最后。 -- 狼人:伪装成村民的掠夺者,目标是比村民活得更久并杀死村民们。 -- 预言家:一位拥有每晚看到一名玩家真实身份能力的村民。 -- 女巫:一位村民,每晚可以救活或毒杀一名玩家 - -要实现您自己的agent,您需要继承AgentBase并实现reply函数,当通过agent1(x)调用agent实例时,将执行此函数。 - -```python -from agentscope.agents import AgentBase -from agentscope.message import Msg - -from typing import Optional, Union, Sequence - - -class MyAgent(AgentBase): - - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - # Do something here - ... - return x -``` - -AgentScope提供了几种开箱即用的agent实现,作为一个agent样例池。在这个应用程序中,我们使用一个内置agent,DictDialogAgent。这里我们给出一个将玩家分配为狼人角色的DictDialogAgent的示例配置: - -```json -{ - "class": "DictDialogAgent", - "args": { - "name": "Player1", - "sys_prompt": "Act as a player in a werewolf game. You are Player1 and\nthere are totally 6 players, named Player1, Player2, Player3, Player4, Player5 and Player6.\n\nPLAYER ROLES:\nIn werewolf game, players are divided into two werewolves, two villagers, one seer, and one witch. Note only werewolves know who are their teammates.\nWerewolves: They know their teammates' identities and attempt to eliminate a villager each night while trying to remain undetected.\nVillagers: They do not know who the werewolves are and must work together during the day to deduce who the werewolves might be and vote to eliminate them.\nSeer: A villager with the ability to learn the true identity of one player each night. This role is crucial for the villagers to gain information.\nWitch: A character who has a one-time ability to save a player from being eliminated at night (sometimes this is a potion of life) and a one-time ability to eliminate a player at night (a potion of death).\n\nGAME RULE:\nThe game consists of two phases: night phase and day phase. The two phases are repeated until werewolf or villager wins the game.\n1. Night Phase: During the night, the werewolves discuss and vote for a player to eliminate. Special roles also perform their actions at this time (e.g., the Seer chooses a player to learn their role, the witch chooses a decide if save the player).\n2. Day Phase: During the day, all surviving players discuss who they suspect might be a werewolf. No one reveals their role unless it serves a strategic purpose. After the discussion, a vote is taken, and the player with the most votes is \"lynched\" or eliminated from the game.\n\nVICTORY CONDITION:\nFor werewolves, they win the game if the number of werewolves is equal to or greater than the number of remaining villagers.\nFor villagers, they win if they identify and eliminate all of the werewolves in the group.\n\nCONSTRAINTS:\n1. Your response should be in the first person.\n2. This is a conversational game. You should respond only based on the conversation history and your strategy.\n\nYou are playing werewolf in this game.\n", - "model_config_name": "gpt-3.5-turbo", - "use_memory": true - } -} -``` - -在这个配置中,Player1被指定为一个DictDialogAgent。参数包括一个系统提示(sys_prompt),它可以指导agent的行为;一个模型配置名(model_config_name),它决定了模型配置的名称;以及一个标志(use_memory),指示agent是否应该记住过去的互动。 - -对于其他玩家,大家可以根据他们的角色进行定制。每个角色可能有不同的提示、模型或记忆设置。您可以参考位于AgentScope示例目录下的`examples/game_werewolf/configs/agent_configs.json`文件。 - -### 第三步:初始化AgentScope和Agents - -现在我们已经定义了角色,我们可以初始化AgentScope环境和所有agents。这个过程通过AgentScope的几行代码和我们准备的配置文件(假设有2个狼人、2个村民、1个女巫和1个预言家)就能简单完成: - -```python -import agentscope - -# read model and agent configs, and initialize agents automatically -survivors = agentscope.init( - model_configs="./configs/model_configs.json", - agent_configs="./configs/agent_configs.json", - logger_level="DEBUG", -) - -# Define the roles within the game. This list should match the order and number -# of agents specified in the 'agent_configs.json' file. -roles = ["werewolf", "werewolf", "villager", "villager", "seer", "witch"] - -# Based on their roles, assign the initialized agents to variables. -# This helps us reference them easily in the game logic. -wolves, villagers, witch, seer = survivors[:2], survivors[2:-2], survivors[-1], survivors[-2] -``` - -上面这段代码中,我们为我们的agent分配了角色,并将它们与决定它们行为的配置相关联。 - -### 第四步:构建游戏逻辑 - -在这一步中,您将使用AgentScope的辅助工具设置游戏逻辑,并组织狼人游戏的流程。 - -#### 使用 Parser - -为了能让 `DictDialogAgent` 能够按照用户定制化的字段进行输出,以及增加大模型解析不同字段内容的成功率,我们新增了 `parser` -模块。下面是一个 `parser` 例子的配置: - -``` -to_wolves_vote = "Which player do you vote to kill?" - -wolves_vote_parser = MarkdownJsonDictParser( - content_hint={ - "thought": "what you thought", - "vote": "player_name", - }, - required_keys=["thought", "vote"], - keys_to_memory="vote", - keys_to_content="vote", -) -``` - -关于 `parser` 的更多内容,可以参考[这里](https://modelscope.github.io/agentscope/en/tutorial/203-parser.html). - -#### 使用 Pipeline 和 MsgHub - -为了简化agent通信的构建,AgentScope提供了两个有用的概念:Pipeline和MsgHub。 - -- **Pipeline**:它能让用户轻松地编程实现agent之间的不同通信编排。 - - ```python - from agentscope.pipelines import SequentialPipeline - - pipe = SequentialPipeline(agent1, agent2, agent3) - x = pipe(x) # the message x will be passed and replied by agent 1,2,3 in order - ``` - -- **MsgHub**:您可能已经注意到,上述所有例子都是一对一通信。为了实现群聊,我们提供了另一个通信辅助工具msghub。有了它,参与者的消息将自动广播给所有其他参与者。在这种情况下,参与agent甚至不需要输入和输出消息。我们需要做的只是决定发言的顺序。此外,msghub还支持参与者的动态控制。 - - ```python - with msghub(participants=[agent1, agent2, agent3]) as hub: - agent1() - agent2() - - # Broadcast a message to all participants - hub.broadcast(Msg("Host", "Welcome to join the group chat!")) - - # Add or delete participants dynamically - hub.delete(agent1) - hub.add(agent4) - ``` - -#### 实现狼人杀的游戏流程 - -游戏逻辑分为两个主要阶段:(1)夜晚,狼人行动;以及(2)白天,所有玩家讨论和投票。每个阶段都将通过使用pipelines来管理多agent通信的代码部分来处理。 - -- **1.1 夜晚阶段:狼人讨论和投票** - -在夜晚阶段,狼人必须相互讨论以决定一个要杀死的目标。msghub函数为狼人之间的通信创建了一个消息中心,其中每个agent发送的消息都能被msghub内的所有其他agent观察到。 - -```python -# start the game -for i in range(1, MAX_GAME_ROUND + 1): - # Night phase: werewolves discuss - hint = HostMsg(content=Prompts.to_wolves.format(n2s(wolves))) - with msghub(wolves, announcement=hint) as hub: - set_parsers(wolves, Prompts.wolves_discuss_parser) - for _ in range(MAX_WEREWOLF_DISCUSSION_ROUND): - x = sequentialpipeline(wolves) - if x.metadata.get("finish_discussion", False): - break -``` - -讨论结束后,根据少数服从多数,狼人进行投票选出他们的目标。然后,投票的结果将广播给所有狼人。 - -注意:具体的提示和实用函数可以在`examples/game_werewolf`中找到。 - -```python - # werewolves vote - set_parsers(wolves, Prompts.wolves_vote_parser) - hint = HostMsg(content=Prompts.to_wolves_vote) - votes = [extract_name_and_id(wolf(hint).content)[0] for wolf in wolves] - # broadcast the result to werewolves - dead_player = [majority_vote(votes)] - hub.broadcast( - HostMsg(content=Prompts.to_wolves_res.format(dead_player[0])), - ) -``` - -- **1.2 女巫的回合** - -如果女巫还活着,她就有机会使用她的力量:救被狼人选中的(被杀的)玩家,或使用她的毒药去杀一位玩家。 - -```python - # Witch's turn - healing_used_tonight = False - if witch in survivors: - if healing: - # Witch decides whether to use the healing potion - hint = HostMsg( - content=Prompts.to_witch_resurrect.format_map( - {"witch_name": witch.name, "dead_name": dead_player[0]}, - ), - ) - # Witch decides whether to use the poison - set_parsers(witch, Prompts.witch_resurrect_parser) - if witch(hint).metadata.get("resurrect", False): - healing_used_tonight = True - dead_player.pop() - healing = False -``` - -- **1.3 预言家的回合** - -预言家有机会揭示一名玩家的真实身份。这信息对于村民方来说可能至关重要。`observe()`函数允许每个agent注意到一个消息,而不需要立即产生回复。 - -```python - # Seer's turn - if seer in survivors: - # Seer chooses a player to reveal their identity - hint = HostMsg( - content=Prompts.to_seer.format(seer.name, n2s(survivors)), - ) - set_parsers(seer, Prompts.seer_parser) - x = seer(hint) - - player, idx = extract_name_and_id(x.content) - role = "werewolf" if roles[idx] == "werewolf" else "villager" - hint = HostMsg(content=Prompts.to_seer_result.format(player, role)) - seer.observe(hint) -``` - -- **1.4 更新存活玩家** - -根据夜间采取的行动,程序需要更新幸存玩家的列表。 - -```python - # Update the list of survivors and werewolves after the night's events - survivors, wolves = update_alive_players(survivors, wolves, dead_player) -``` - -- **2.1 白天阶段:讨论和投票** - -在白天,所有存活玩家将讨论然后投票以淘汰一名疑似狼人的玩家。 - -```python - # Daytime discussion - with msghub(survivors, announcement=hints) as hub: - # Discuss - set_parsers(survivors, Prompts.survivors_discuss_parser) - x = sequentialpipeline(survivors) - # Vote - set_parsers(survivors, Prompts.survivors_vote_parser) - hint = HostMsg(content=Prompts.to_all_vote.format(n2s(survivors))) - votes = [extract_name_and_id(_(hint).content)[0] for _ in survivors] - vote_res = majority_vote(votes) - # Broadcast the voting result to all players - result = HostMsg(content=Prompts.to_all_res.format(vote_res)) - hub.broadcast(result) - # Update the list of survivors and werewolves after the vote - survivors, wolves = update_alive_players(survivors, wolves, vote_res) -``` - -- **2.2 检查胜利条件** - -每个阶段结束后,游戏会检查是狼人还是村民获胜。 - -```python - # Check if either side has won - if check_winning(survivors, wolves, "Moderator"): - break -``` - -- **2.3 继续到下一轮** - -如果狼人和村民都没有获胜,游戏将继续到下一轮。 - -```python - # If the game hasn't ended, prepare for the next round - hub.broadcast(HostMsg(content=Prompts.to_all_continue)) -``` - -这些代码块展现了使用AgentScope的`msghub`和`pipeline`的狼人游戏的核心游戏循环,这些工具有助于轻松管理应用程序的操作逻辑。 - -### 第五步:运行应用 - -完成了以上游戏逻辑和agent的设置,您已经可以运行狼人游戏了。通过执行`pipeline`,游戏将按预定义的阶段进行,agents -基于它们的角色和上述编码的策略进行互动: - -```bash -cd examples/game_werewolf -python werewolf.py # Assuming the pipeline is implemented in werewolf.py -``` - -建议您在在 [AgentScope Studio](https://modelscope.github.io/agentscope/zh_CN/tutorial/209-gui.html) 中启动游戏,在对应的链接中您将看到下面的内容输出。 - -![s](https://img.alicdn.com/imgextra/i3/O1CN01n2Q2tR1aCFD2gpTdu_!!6000000003293-1-tps-960-482.gif) - -[[返回顶部]](#104-usecase-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md b/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md deleted file mode 100644 index 16d8db1cd..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/105-logging.md +++ /dev/null @@ -1,95 +0,0 @@ -(105-logging-zh)= - -# 日志 - -本节教程主要是关于AgentScope的日志记录(logging)功能。我们会介绍如何能美观地将这些日志可视化。这个模块会帮助您更方便、清晰、有组织地跟踪智能体之间的互动和各种系统消息。 - -## Logging - -日志功能首先包含的是一个基于Python内置 `logging`的根据多智体场景可定制化的`loguru.logger`模块。其包含下面的一些特性: - -- **调整输出字体颜色**:为了增加日志的可读性,该模块为不同的在对话中发言智能体提供不同颜色的字体高亮。 -- **重定向错误输出(stderr)**: 该模块自动抓取报错信息,在日志中用`ERROR`层级记录。 -- **客制化日志记录等级**: 该模块增加了一个日志记录等级`CHAT`,用来记录智能体之间的对话和互动。 -- **定制格式**:格式化日志包含了时间戳、记录等级、function名字和行号。智能体之间的对话会用不同的格式显示。 - -### 设置日志记录(Logger) - -我们推荐通过`agentscope.init`来设置logger,包括设定记录等级: - -```python -import agentscope - -LOG_LEVEL = Literal[ - "CHAT", - "TRACE", - "DEBUG", - "INFO", - "SUCCESS", - "WARNING", - "ERROR", - "CRITICAL", -] - -agentscope.init(..., logger_level="INFO") -``` - -### Logging a Chat Message - -### 记录对话消息 - -开发者可以通过记录`message`来追踪智能体之间的对话。下面是一些简单的如何记录`message`的例子例子: - -```python -# Log a simple string message. -logger.chat("Hello World!") - -# Log a `msg` representing dialogue with a speaker and content. -logger.chat({"name": "User", "content": "Hello, how are you?"}) -logger.chat({"name": "Agent", "content": "I'm fine, thank you!"}) -``` - -### 记录系统信息 - -系统日志对于跟踪应用程序的状态和识别问题至关重要。以下是记录不同级别系统信息的方法: - -```python -# Log general information useful for understanding the flow of the application. -logger.info("The dialogue agent has started successfully.") - -# Log a warning message indicating a potential issue that isn't immediately problematic. -logger.warning("The agent is running slower than expected.") - -# Log an error message when something has gone wrong. -logger.error("The agent encountered an unexpected error while processing a request.") -``` - -## 将日志与WebUI集成 - -为了可视化这些日志和运行细节,AgentScope提供了一个简单的网络界面。 - -### 快速运行 - -你可以用以下Python代码中运行WebUI: - -```python -import agentscope - -agentscope.web.init( - path_save="YOUR_SAVE_PATH" -) -``` - -通过这种方式,你可以在 `http://127.0.0.1:5000` 中看到所有运行中的实例和项目,如下所示 - -![webui](https://img.alicdn.com/imgextra/i3/O1CN01kpHFkn1HpeYEkn60I_!!6000000000807-0-tps-3104-1849.jpg) - -通过点击一个运行中的实例,我们可以观察到更多细节。 - -![The running details](https://img.alicdn.com/imgextra/i2/O1CN01AZtsf31MIHm4FmjjO_!!6000000001411-0-tps-3104-1849.jpg) - -### 注意 - -WebUI仍在开发中。我们将在未来提供更多功能和更好的用户体验。 - -[[返回顶部]](#105-logging-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/201-agent.md b/docs/sphinx_doc/zh_CN/source/tutorial/201-agent.md deleted file mode 100644 index 01f4bf6ef..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/201-agent.md +++ /dev/null @@ -1,191 +0,0 @@ -(201-agent-zh)= - -# Agent - -本教程帮助你更深入地理解Agent,并引导你通过使用AgentScope定制自己的自定义agent。 -我们首先介绍一个称为AgentBase的基本抽象概念,它作为基类维护所有agent的通用行为。然后,我们将探讨AgentPool,这是一个由预构建的、专门化的agent组成的集合,每个agent都设计有特定的目的。最后,我们将演示如何定制你自己的agent,确保它符合你项目的需求。 - -## 理解 `AgentBase` - -`AgentBase`类是AgentScope内所有agent结构的架构基石。作为所有自定义agent的超类,它提供了一个包含基本属性和方法的综合模板,这些属性和方法支撑了任何会话agent的核心功能。 - -每个AgentBase的派生类由几个关键特性组成: - -* `memory`(记忆):这个属性使agent能够保留和回忆过去的互动,允许它们在持续的对话中保持上下文。关于memory的更多细节,我们会在[记忆和消息管理部分](205-memory)讨论。 - -* `model`(模型):模型是agent的计算引擎,负责根据现有的记忆和输入做出响应。关于model的更多细节,我们在[使用模型API与不同模型源部分](203-model)讨论 - -* `sys_prompt`(系统提示)和`engine`(引擎):系统提示作为预定义的指令,指导agent在其互动中的行为;而engine用于动态生成合适的提示。关于它们的更多细节,我们会在[提示引擎部分](206-prompt)讨论。 - -* `to_dist`(分布式):用于创建 agent 的分布式版本,以支持多 agent 的高效协作。请注意`to_dist`是一个保留字段,将自动添加到`AgentBase`所有子类的初始化函数中。关于 `to_dist` 的更多细节,请见[分布式部分](208-distribute)。 - -除了这些属性,`AgentBase` 还为agent提供了一些关键方法,如 `observe` 和 `reply`: - -* `observe()`:通过这个方法,一个agent可以注意到消息而不立即回复,允许它根据观察到的消息更新它的记忆。 -* `reply()`:这是开发者必须实现的主要方法。它定义了agent对于传入消息的响应行为,封装了agent输出的逻辑。 - -此外,为了统一接口和类型提示,我们引入了另一个基类`Operator`,它通过 `__call__` 函数表示对输入数据执行某些操作。并且我们让 `AgentBase` 成为 `Operator` 的一个子类。 - -```python -class AgentBase(Operator): - # ... [code omitted for brevity] - - def __init__( - self, - name: str, - sys_prompt: Optional[str] = None, - model_config_name: str = None, - use_memory: bool = True, - ) -> None: - - # ... [code omitted for brevity] - def observe(self, x: Union[dict, Sequence[dict]]) -> None: - # An optional method for updating the agent's internal state based on - # messages it has observed. This method can be used to enrich the - # agent's understanding and memory without producing an immediate - # response. - if self.memory: - self.memory.add(x) - - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - # The core method to be implemented by custom agents. It defines the - # logic for processing an input message and generating a suitable - # response. - raise NotImplementedError( - f"Agent [{type(self).__name__}] is missing the required " - f'"reply" function.', - ) - - # ... [code omitted for brevity] -``` - -## 探索AgentPool - -在 AgentScope 中的 `AgentPool` 是一个经过精选的,随时可用的,专门化agent集合。这些agent中的每一个都是为了特定的角色量身定做,并配备了处理特定任务的默认行为。`AgentPool` 旨在通过提供各种 Agent 模板来加快开发过程。 - -以下是一个总结了 AgentPool 中一些关键agent的功能的表格: - -| Agent 种类 | 描述 | Typical Use Cases | -| ------------------ | --------------------------------------------------------------------------- | --------------------------- | -| `AgentBase` | 作为所有agent的超类,提供了必要的属性和方法。 | 构建任何自定义agent的基础。 | -| `DialogAgent` | 通过理解上下文和生成连贯的响应来管理对话。 | 客户服务机器人,虚拟助手。 | -| `DictDialogAgent` | 通过理解上下文和生成连贯的响应来管理对话,返回的消息为 Json 格式。 | 客户服务机器人,虚拟助手。 | -| `UserAgent` | 与用户互动以收集输入,生成可能包括URL或基于所需键的额外具体信息的消息。 | 为agent收集用户输入 | -| `ReActAgent` | 实现了 ReAct 算法的 Agent,能够自动调用工具处理较为复杂的任务。 | 借助工具解决复杂任务 | -| *更多agent* | AgentScope 正在不断扩大agent池,加入更多专门化的agent,以适应多样化的应用。 | | - -## 从Agent池中定制Agent - -从 AgentPool 中定制一个agent,使您能够根据您的多agent应用的独特需求来调整其功能。您可以通过调整配置和提示来轻松修改现有agent,或者,对于更广泛的定制,您可以进行二次开发 - -下面,我们提供了如何配置来自 AgentPool 的各种agent的用法: - -### `DialogAgent` - -* **回复方法**:`reply` 方法是处理输入消息和生成响应的主要逻辑所在 - -```python -def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - # Additional processing steps can occur here - - # Record the input if needed - if self.memory: - self.memory.add(x) - - # Generate a prompt for the language model using the system prompt and memory - prompt = self.model.format( - Msg("system", self.sys_prompt, role="system"), - self.memory - and self.memory.get_memory() - or x, # type: ignore[arg-type] - ) - - # Invoke the language model with the prepared prompt - response = self.model(prompt).text - - #Format the response and create a message object - msg = Msg(self.name, response, role="assistant") - - # Print/speak the message in this agent's voice - self.speak(msg) - - # Record the message to memory and return it - if self.memory: - self.memory.add(msg) - - return msg -``` - -* **用法**:为了定制一个用于客户服务机器人的 `DialogAgent`: - -```python -from agentscope.agents import DialogAgent - -# Configuration for the DialogAgent -dialog_agent_config = { - "name": "ServiceBot", - "model_config_name": "gpt-3.5", # Specify the model used for dialogue generation - "sys_prompt": "Act as AI assistant to interact with the others. Try to " - "reponse on one line.\n", # Custom prompt for the agent - # Other configurations specific to the DialogAgent -} - -# Create and configure the DialogAgent -service_bot = DialogAgent(**dialog_agent_config) -``` - -### `UserAgent` - -* **回复方法**:这个方法通过提示内容以及在需要时附加的键和URL来处理用户输入。收集到的数据存储在agent记忆中的一个message对象里,用于记录或稍后使用,并返回该message作为响应。 - -```python -def reply( - self, - x: Optional[Union[Msg, Sequence[Msg]]] = None, - required_keys: Optional[Union[list[str], str]] = None, -) -> Msg: - # Check if there is initial data to be added to memory - if self.memory: - self.memory.add(x) - - content = input(f"{self.name}: ") # Prompt the user for input - kwargs = {} - - # Prompt for additional information based on the required keys - if required_keys is not None: - if isinstance(required_keys, str): - required_keys = [required_keys] - for key in required_keys: - kwargs[key] = input(f"{key}: ") - - # Optionally prompt for a URL if required - url = None - if self.require_url: - url = input("URL: ") - - # Create a message object with the collected input and additional details - msg = Msg(self.name, content=content, url=url, **kwargs) - - # Add the message object to memory - if self.memory: - self.memory.add(msg) - - return msg -``` - -* **用法**:配置一个 UserAgent 用于收集用户输入和URL(文件、图像、视频、音频或网站的URL): - -```python -from agentscope.agents import UserAgent - -# Configuration for UserAgent -user_agent_config = { - "name": "User", - "require_url": True, # If true, the agent will require a URL -} - -# Create and configure the UserAgent -user_proxy_agent = UserAgent(**user_agent_config) -``` - -[[返回顶部]](#201-agent-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/202-pipeline.md b/docs/sphinx_doc/zh_CN/source/tutorial/202-pipeline.md deleted file mode 100644 index 5713c317a..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/202-pipeline.md +++ /dev/null @@ -1,302 +0,0 @@ -(202-pipeline-zh)= - -# Pipeline和MsgHub - -**Pipeline**和**Message Hub**主要用于描绘应用中信息的交换和传播过程,它们极大简化了Multi-Agent应用流程的编排工作。 -在本教程中,我们将详细的介绍Pipeline和Message Hub的原理和使用方式。 - -## Pipeline - -在AgentScope中,消息的交换、传播构成了Multi-Agent应用。但是对复杂应用来说,细致的描绘每一次信息交流对开发者来说是非常困难的。 -`Pipeline`主要用于简化“描述消息传播”的编程工作。 - -`Pipeline`中接收的对象是`Operator`,即信息的加工和传播单元(例如智能体`Agent`是`Operator -`的一个子类),而`Pipeline`自身也是`Operator`的子类。以下是所有`Pipeline`的基类: - -```python -class PipelineBase(Operator): - """所有pipelines的基础接口.""" - # ... [为简洁起见省略代码] - @abstractmethod - def __call__(self, x: Optional[dict] = None) -> dict: - """在这定义pipeline采取的操作。 - - Args: - x (Optional[`dict`], optional): - 对话历史以及一些环境信息。 - - Returns: - `dict`: 经过Pipeline处理后的返回消息。 - """ -``` - -### 类别 - -为了方便开发者的使用,对于同一功能的Pipeline,AgentScope提供了两种不同的实现策略: - -* **对象类型Pipeline** - - * 这些Pipeline是面向对象的,继承自 - `PipelineBase`。它们本身是`Operator`,可以与其他运算符组合以创建复杂的交互模式,并且可以复用。 - - ```python - # 实例化并调用 - pipeline = ClsPipeline([agent1, agent2, agent3]) - x = pipeline(x) - ``` - -* **函数式Pipeline** - - * 函数式Pipeline是独立的函数实现,在不需要复用的一次性使用场景中很有用。 - - ```python - # 只需要调用 - x = funcpipeline([agent1, agent2, agent3], x) - ``` - -Pipeline根据其功能被分类成以下的类型。下表概述了 AgentScope 中可用的不同 Pipeline: - -| 运算符类型Pipeline | 函数式Pipeline | 描述 | -| -------------------- | ------------------- | ------------------------------------------------------------ | -| `SequentialPipeline` | `sequentialpipeline` | 按顺序执行一系列运算符,将一个运算符的输出作为下一个运算符的输入。 | -| `IfElsePipeline` | `ifelsepipeline` | 实现条件逻辑,如果条件为真,则执行一个运算符;如果条件为假,则执行另一个运算符。 | -| `SwitchPipeline` | `switchpipeline` | 实现分支选择,根据条件的结果从映射集中执行一个运算符。 | -| `ForLoopPipeline` | `forlooppipeline` | 重复执行一个运算符,要么达到设定的迭代次数,要么直到满足指定的中止条件。 | -| `WhileLoopPipeline` | `whilelooppipeline` | 只要给定条件保持为真,就持续执行一个运算符。 | -| - | `placeholder` | 在流控制中不需要任何操作的分支,如 if-else/switch 中充当占位符。 | - -### 使用说明 - -本节通过比较有无 Pipeline 的情况下多智能体应用程序中逻辑实现的方式,来阐释 Pipeline 如何简化逻辑实现。 -**注意:** 请注意,在下面提供的示例中,我们使用术语 `agent` 来代表任何可以作为 `Operator` 的实例。这是为了便于理解,并说明 Pipeline 是如何协调不同操作之间的交互的。您可以将 `agent` 替换为任何 `Operator`,从而在实践中允许 `agent` 和 `pipeline` 的混合使用。 - -#### `SequentialPipeline` - -* 不使用 pipeline: - - ```python - x = agent1(x) - x = agent2(x) - x = agent3(x) - ``` - -* 使用 pipeline: - - ```python - from agentscope.pipelines import SequentialPipeline - - pipe = SequentialPipeline([agent1, agent2, agent3]) - x = pipe(x) - ``` - -* 使用函数式 pipeline: - - ```python - from agentscope.pipelines import sequentialpipeline - - x = sequentialpipeline([agent1, agent2, agent3], x) - ``` - -#### `IfElsePipeline` - -* 不使用 pipeline: - - ```python - if condition(x): - x = agent1(x) - else: - x = agent2(x) - ``` - -* 使用 pipeline: - - ```python - from agentscope.pipelines import IfElsePipeline - - pipe = IfElsePipeline(condition, agent1, agent2) - x = pipe(x) - ``` - -* 使用函数式 pipeline: - - ```python - from agentscope.functional import ifelsepipeline - - x = ifelsepipeline(condition, agent1, agent2, x) - ``` - -#### `SwitchPipeline` - -* 不使用 pipeline: - - ```python - switch_result = condition(x) - if switch_result == case1: - x = agent1(x) - elif switch_result == case2: - x = agent2(x) - else: - x = default_agent(x) - ``` - -* 使用 pipeline: - - ```python - from agentscope.pipelines import SwitchPipeline - - case_operators = {case1: agent1, case2: agent2} - pipe = SwitchPipeline(condition, case_operators, default_agent) - x = pipe(x) - ``` - -* 使用函数式 pipeline: - - ```python - from agentscope.functional import switchpipeline - - case_operators = {case1: agent1, case2: agent2} - x = switchpipeline(condition, case_operators, default_agent, x) - ``` - -#### `ForLoopPipeline` - -* 不使用 pipeline: - - ```python - for i in range(max_iterations): - x = agent(x) - if break_condition(x): - break - ``` - -* 使用 pipeline: - - ```python - from agentscope.pipelines import ForLoopPipeline - - pipe = ForLoopPipeline(agent, max_iterations, break_condition) - x = pipe(x) - ``` - -* 使用函数式 pipeline: - - ```python - from agentscope.functional import forlooppipeline - - x = forlooppipeline(agent, max_iterations, break_condition, x) - ``` - -#### `WhileLoopPipeline` - -* 不使用 pipeline: - - ```python - while condition(x): - x = agent(x) - ``` - -* 使用 pipeline: - - ```python - from agentscope.pipelines import WhileLoopPipeline - - pipe = WhileLoopPipeline(agent, condition) - x = pipe(x) - ``` - -* 使用函数式 pipeline: - - ```python - from agentscope.functional import whilelooppipeline - - x = whilelooppipeline(agent, condition, x) - ``` - -### Pipeline 组合 - -值得注意的是,AgentScope 支持组合 Pipeline 来创建复杂的交互。例如,我们可以创建一个 Pipeline,按顺序执行一系列智能体,然后执行另一个 Pipeline,根据条件执行一系列智能体。 - -```python -from agentscope.pipelines import SequentialPipeline, IfElsePipeline -# 创建一个按顺序执行智能体的 Pipeline -pipe1 = SequentialPipeline([agent1, agent2, agent3]) -# 创建一个条件执行智能体的 Pipeline -pipe2 = IfElsePipeline(condition, agent4, agent5) -# 创建一个按顺序执行 pipe1 和 pipe2 的 Pipeline -pipe3 = SequentialPipeline([pipe1, pipe2]) -# 调用 Pipeline -x = pipe3(x) -``` - -## MsgHub - -`MsgHub` 旨在管理一组智能体之间的对话/群聊,其中允许共享消息。通过 `MsgHub`,智能体可以使用 `broadcast` 向群组中的所有其他智能体广播消息。 - -以下是 `MsgHub` 的核心类: - -```python -class MsgHubManager: - """MsgHub 管理类,用于在一组智能体之间共享对话。""" - # ... [为简洁起见省略代码] - - def broadcast(self, msg: Union[dict, list[dict]]) -> None: - """将消息广播给所有参与者。""" - for agent in self.participants: - agent.observe(msg) - - def add(self, new_participant: Union[Sequence[AgentBase], AgentBase]) -> None: - """将新参与者加入此 hub""" - # ... [为简洁起见省略代码] - - def delete(self, participant: Union[Sequence[AgentBase], AgentBase]) -> None: - """从参与者中删除智能体。""" - # ... [为简洁起见省略代码] -``` - -### 使用说明 - -#### 创建一个 MsgHub - -要创建一个 `MsgHub`,请通过调用 `msghub` 辅助函数并传入参与智能体列表来实例化一个 `MsgHubManager`。此外,您可以提供一个可选的初始声明`announcement`,如果提供,将在初始化时广播给所有参与者。 - -```python -from agentscope.msg_hub import msghub - -# Initialize MsgHub with participating agents -hub_manager = msghub( - participants=[agent1, agent2, agent3], announcement=initial_announcement -) -``` - -#### 在 MsgHub 中广播消息 - -`MsgHubManager` 可以与上下文管理器一起使用,以处理`MsgHub`环境的搭建和关闭: - -```python -with msghub( - participants=[agent1, agent2, agent3], announcement=initial_announcement -) as hub: - # 智能体现在可以在这个块中广播和接收消息 - agent1() - agent2() - - # 或者手动广播一条消息 - hub.broadcast(some_message) - -``` - -退出上下文块时,`MsgHubManager` 会确保每个智能体的听众被清空,防止在中心环境之外的任何意外消息共享。 - -#### 添加和删除参与者 - -你可以动态地从 `MsgHub` 中添加或移除智能体: - -```python -# 添加一个新参与者 -hub.add(new_agent) - -# 移除一个现有的参与者 -hub.delete(existing_agent) -``` - -[[返回顶部]](#202-pipeline-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/203-model.md b/docs/sphinx_doc/zh_CN/source/tutorial/203-model.md deleted file mode 100644 index 8ce88f7e3..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/203-model.md +++ /dev/null @@ -1,650 +0,0 @@ -(203-model-zh)= - -# 模型 - -AgentScope中,模型的部署和调用是通过`ModelWrapper`来解耦开的,开发者可以通过提供模型配置(Model config)的方式指定模型,同时AgentScope也提供脚本支持开发者自定义模型服务。 - -## 支持模型 - -目前,AgentScope内置以下模型服务API的支持: - -- OpenAI API,包括对话(Chat),图片生成(DALL-E)和文本嵌入(Embedding)。 -- DashScope API,包括对话(Chat)和图片生成(Image Sythesis)和文本嵌入(Text Embedding)。 -- Gemini API,包括对话(Chat)和嵌入(Embedding)。 -- ZhipuAi API,包括对话(Chat)和嵌入(Embedding)。 -- Ollama API,包括对话(Chat),嵌入(Embedding)和生成(Generation)。 -- LiteLLM API, 包括对话(Chat), 支持各种模型的API. -- Post请求API,基于Post请求实现的模型推理服务,包括Huggingface/ModelScope - Inference API和各种符合Post请求格式的API。 -- Anthropic 对话 API。 - -## 配置方式 - -AgentScope中,用户通过`agentscope.init`接口中的`model_configs`参数来指定模型配置。 -`model_configs`可以是一个字典,或是一个字典的列表,抑或是一个指向模型配置文件的路径。 - -```python -import agentscope - -agentscope.init(model_configs=MODEL_CONFIG_OR_PATH) -``` - -其中`model_configs`的一个例子如下: - -```python -model_configs = [ - { - "config_name": "gpt-4-temperature-0.0", - "model_type": "openai_chat", - "model_name": "gpt-4", - "api_key": "xxx", - "organization": "xxx", - "generate_args": { - "temperature": 0.0 - } - }, - { - "config_name": "dall-e-3-size-1024x1024", - "model_type": "openai_dall_e", - "model_name": "dall-e-3", - "api_key": "xxx", - "organization": "xxx", - "generate_args": { - "size": "1024x1024" - } - }, - # 在这里可以配置额外的模型 -] -``` - -### 配置格式 - -AgentScope中,模型配置是一个字典,用于指定模型的类型以及设定调用参数。 -我们将模型配置中的字段分为_基础参数_和_调用参数_两类。 -其中,基础参数包括`config_name`和`model_type`两个基本字段,分别用于区分不同的模型配置和具 -体的`ModelWrapper`类型。 - -```python -{ - # 基础参数 - "config_name": "gpt-4-temperature-0.0", # 模型配置名称 - "model_type": "openai_chat", # 对应`ModelWrapper`类型 - - # 详细参数 - # ... -} -``` - -#### 基础参数 - -基础参数中,`config_name`是模型配置的标识,我们将在初始化智能体时用该字段指定使用的模型服务。 - -`model_type`对应了`ModelWrapper`的类型,用于指定模型服务的类型。对应源代码中`ModelWrapper -`类的`model_type`字段。 - -```python -class OpenAIChatWrapper(OpenAIWrapper): - """The model wrapper for OpenAI's chat API.""" - - model_type: str = "openai_chat" - # ... -``` - -在目前的AgentScope中,所支持的`model_type`类型,对应的`ModelWrapper`类,以及支持的 -API如下: - -| API | Task | Model Wrapper | `model_type` | Some Supported Models | -|------------------------|-----------------|---------------------------------------------------------------------------------------------------------------------------------|-------------------------------|--------------------------------------------------| -| OpenAI API | Chat | [`OpenAIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/openai_model.py) | `"openai_chat"` | gpt-4, gpt-3.5-turbo, ... | -| | Embedding | [`OpenAIEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/openai_model.py) | `"openai_embedding"` | text-embedding-ada-002, ... | -| | DALL·E | [`OpenAIDALLEWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/openai_model.py) | `"openai_dall_e"` | dall-e-2, dall-e-3 | -| DashScope API | Chat | [`DashScopeChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_chat"` | qwen-plus, qwen-max, ... | -| | Image Synthesis | [`DashScopeImageSynthesisWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_image_synthesis"` | wanx-v1 | -| | Text Embedding | [`DashScopeTextEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_text_embedding"` | text-embedding-v1, text-embedding-v2, ... | -| | Multimodal | [`DashScopeMultiModalWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_multimodal"` | qwen-vl-plus, qwen-vl-max, qwen-audio-turbo, ... | -| Gemini API | Chat | [`GeminiChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/gemini_model.py) | `"gemini_chat"` | gemini-pro, ... | -| | Embedding | [`GeminiEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/gemini_model.py) | `"gemini_embedding"` | models/embedding-001, ... | -| ZhipuAI API | Chat | [`ZhipuAIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/zhipu_model.py) | `"zhipuai_chat"` | glm-4, ... | -| | Embedding | [`ZhipuAIEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/zhipu_model.py) | `"zhipuai_embedding"` | embedding-2, ... | -| ollama | Chat | [`OllamaChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/ollama_model.py) | `"ollama_chat"` | llama2, ... | -| | Embedding | [`OllamaEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/ollama_model.py) | `"ollama_embedding"` | llama2, ... | -| | Generation | [`OllamaGenerationWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/ollama_model.py) | `"ollama_generate"` | llama2, ... | -| LiteLLM API | Chat | [`LiteLLMChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/litellm_model.py) | `"litellm_chat"` | - | -| Yi API | Chat | [`YiChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/yi_model.py) | `"yi_chat"` | yi-large, yi-medium, ... | -| Post Request based API | - | [`PostAPIModelWrapperBase`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/post_model.py) | - | - | -| | Chat | [`PostAPIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/post_model.py) | `"post_api_chat"` | meta-llama/Meta-Llama-3-8B-Instruct, ... | -| | Image Synthesis | [`PostAPIDALLEWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/post_model.py) | `post_api_dall_e` | - | | -| | Embedding | [`PostAPIEmbeddingWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/post_model.py) | `post_api_embedding` | - | -| Anthropic API | Chat | [`AnthropicChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/anthropic_model.py) | `"anthropic_chat"` | claude-3-5-sonnet-20241022, ... | - -#### 详细参数 - -根据`ModelWrapper`的不同,详细参数中所包含的参数不同。 -但是所有的详细参数都会用于初始化`ModelWrapper`类的实例,因此,更详细的参数说明可以根据`ModelWrapper`类的构造函数来查看。 -下面展示了不同`ModelWrapper`对应的模型配置样例,用户可以修改这些样例以适应自己的需求。 - -##### OpenAI API - -
-OpenAI Chat API (agents.models.OpenAIChatWrapper) - -```python -{ - "config_name": "{your_config_name}", - "model_type": "openai_chat", - - # 必要参数 - "model_name": "gpt-4", - - # 可选参数 - "api_key": "{your_api_key}", # OpenAI API Key,如果没有提供,将从环境变量中读取 - "organization": "{your_organization}", # Organization name,如果没有提供,将从环境变量中读取 - "client_args": { # 用于初始化OpenAI API Client的参数 - # 例如:"max_retries": 3, - }, - "generate_args": { # 模型API接口被调用时传入的参数 - # 例如:"temperature": 0.0 - }, -} -``` - -
- -
-OpenAI DALL·E API (agentscope.models.OpenAIDALLEWrapper) - -```python -{ - "config_name": "{your_config_name}", - "model_type": "openai_dall_e", - - # 必要参数 - "model_name": "{model_name}", # OpenAI model name, 例如:dall-e-2, dall-e-3 - - # 可选参数 - "api_key": "{your_api_key}", # OpenAI API Key,如果没有提供,将从环境变量中读取 - "organization": "{your_organization}", # Organization name,如果没有提供,将从环境变量中读取 - "client_args": { # 用于初始化OpenAI API Client的参数 - # 例如:"max_retries": 3, - }, - "generate_args": { # 模型API接口被调用时传入的参数 - # 例如:"n": 1, "size": "512x512" - } -} -``` - -
- -
-OpenAI Embedding API (agentscope.models.OpenAIEmbeddingWrapper) - -```python -{ - "config_name": "{your_config_name}", - "model_type": "openai_embedding", - - # 必要参数 - "model_name": "{model_name}", # OpenAI model name, 例如:text-embedding-ada-002, text-embedding-3-small - - # 可选参数 - "api_key": "{your_api_key}", # OpenAI API Key,如果没有提供,将从环境变量中读取 - "organization": "{your_organization}", # Organization name,如果没有提供,将从环境变量中读取 - "client_args": { # 用于初始化OpenAI API Client的参数 - # 例如:"max_retries": 3, - }, - "generate_args": { # 模型API接口被调用时传入的参数 - # 例如:"encoding_format": "float" - } -} -``` - -
- -
- -#### DashScope API - -
-DashScope Chat API (agentscope.models.DashScopeChatWrapper) - -```python -{ - "config_name": "my_dashscope_chat_config", - "model_type": "dashscope_chat", - - # 必要参数 - "model_name": "{model_name}", # DashScope Chat API中的模型名, 例如:qwen-max - - # 可选参数 - "api_key": "{your_api_key}", # DashScope API Key,如果没有提供,将从环境变量中读取 - "generate_args": { - # 例如:"temperature": 0.5 - }, -} -``` - -
- -
-DashScope Image Synthesis API (agentscope.models.DashScopeImageSynthesisWrapper) - -```python -{ - "config_name": "my_dashscope_image_synthesis_config", - "model_type": "dashscope_image_synthesis", - - # 必要参数 - "model_name": "{model_name}", # DashScope Image Synthesis API中的模型名, 例如:wanx-v1 - - # 可选参数 - "api_key": "{your_api_key}", - "generate_args": { - "negative_prompt": "xxx", - "n": 1, - # ... - } -} -``` - -
- -
-DashScope Text Embedding API (agentscope.models.DashScopeTextEmbeddingWrapper) - -```python -{ - "config_name": "my_dashscope_text_embedding_config", - "model_type": "dashscope_text_embedding", - - # 必要参数 - "model_name": "{model_name}", # DashScope Text Embedding API中的模型名, 例如:text-embedding-v1 - - # 可选参数 - "api_key": "{your_api_key}", - "generate_args": { - # ... - }, -} -``` - -
- -
-DashScope Multimodal Conversation API (agentscope.models.DashScopeMultiModalWrapper) - -```python -{ - "config_name": "my_dashscope_multimodal_config", - "model_type": "dashscope_multimodal", - - # Required parameters - "model_name": "{model_name}", # The model name in DashScope Multimodal Conversation API, e.g. qwen-vl-plus - - # Optional parameters - "api_key": "{your_api_key}", - "generate_args": { - # ... - }, -} -``` - -
- -
- -#### Gemini API - -
-Gemini Chat API (agentscope.models.GeminiChatWrapper) - -```python -{ - "config_name": "my_gemini_chat_config", - "model_type": "gemini_chat", - - # 必要参数 - "model_name": "{model_name}", # Gemini Chat API中的模型名,例如:gemini-pro - - # 可选参数 - "api_key": "{your_api_key}", # 如果没有提供,将从环境变量GEMINI_API_KEY中读取 -} -``` - -
- -
-Gemini Embedding API (agentscope.models.GeminiEmbeddingWrapper) - -```python -{ - "config_name": "my_gemini_embedding_config", - "model_type": "gemini_embedding", - - # 必要参数 - "model_name": "{model_name}", # Gemini Embedding API中的模型名,例如:models/embedding-001 - - # 可选参数 - "api_key": "{your_api_key}", # 如果没有提供,将从环境变量GEMINI_API_KEY中读取 -} -``` - -
- -
- - -#### ZhipuAI API - -
-ZhipuAI Chat API (agentscope.models.ZhipuAIChatWrapper) - -```python -{ - "config_name": "my_zhipuai_chat_config", - "model_type": "zhipuai_chat", - - # Required parameters - "model_name": "{model_name}", # The model name in ZhipuAI API, e.g. glm-4 - - # Optional parameters - "api_key": "{your_api_key}" -} -``` - -
- -
-ZhipuAI Embedding API (agentscope.models.ZhipuAIEmbeddingWrapper) - -```python -{ - "config_name": "my_zhipuai_embedding_config", - "model_type": "zhipuai_embedding", - - # Required parameters - "model_name": "{model_name}", # The model name in ZhipuAI API, e.g. embedding-2 - - # Optional parameters - "api_key": "{your_api_key}", -} -``` - -
- -
- - -#### Ollama API - -
-Ollama Chat API (agentscope.models.OllamaChatWrapper) - -```python -{ - "config_name": "my_ollama_chat_config", - "model_type": "ollama_chat", - - # 必要参数 - "model_name": "{model_name}", # ollama Chat API中的模型名, 例如:llama2 - - # 可选参数 - "options": { # 模型API接口被调用时传入的参数 - # 例如:"temperature": 0., "seed": 123, - }, - "keep_alive": "5m", # 控制一次调用后模型在内存中的存活时间 -} -``` - -
- -
-Ollama Generation API (agentscope.models.OllamaGenerationWrapper) - -```python -{ - "config_name": "my_ollama_generate_config", - "model_type": "ollama_generate", - - # 必要参数 - "model_name": "{model_name}", # ollama Generate API, 例如:llama2 - - # 可选参数 - "options": { # 模型API接口被调用时传入的参数 - # "temperature": 0., "seed": 123, - }, - "keep_alive": "5m", # 控制一次调用后模型在内存中的存活时间 -} -``` - -
- -
-Ollama Embedding API (agentscope.models.OllamaEmbeddingWrapper) - -```python -{ - "config_name": "my_ollama_embedding_config", - "model_type": "ollama_embedding", - - # 必要参数 - "model_name": "{model_name}", # ollama Embedding API, 例如:llama2 - - # 可选参数 - "options": { # 模型API接口被调用时传入的参数 - # "temperature": 0., "seed": 123, - }, - "keep_alive": "5m", # 控制一次调用后模型在内存中的存活时间 -} -``` - -
- -
- - -#### LiteLLM Chat API - -
-LiteLLM Chat API (agentscope.models.LiteLLMChatModelWrapper) - -```python -{ - "config_name": "lite_llm_openai_chat_gpt-3.5-turbo", - "model_type": "litellm_chat", - "model_name": "gpt-3.5-turbo" # You should note that for different models, you should set the corresponding environment variables, such as OPENAI_API_KEY, etc. You may refer to https://docs.litellm.ai/docs/ for this. -}, -``` - -
- -
- - -#### Post Request API - -
-Post Request Chat API (agentscope.models.PostAPIChatWrapper) - -```python -{ - "config_name": "my_postapiwrapper_config", - "model_type": "post_api_chat", - - # Required parameters - "api_url": "https://xxx.xxx", - "headers": { - # e.g. "Authorization": "Bearer xxx", - }, - - # Optional parameters - "messages_key": "messages", -} -``` -> ⚠️ Post Request Chat model wrapper (`PostAPIChatWrapper`) 有以下特性: -> 1) 它的 `.format()` 方法会确保输入的信息(messages)会被转换成字典列表(a list of dict). -> 2) 它的 `._parse_response()` 方法假设了生成的文字内容会在 `response["data"]["response"]["choices"][0]["message"]["content"]` - -
- - -
-Post Request Image Synthesis API (agentscope.models.PostAPIDALLEWrapper) - -```python -{ - "config_name": "my_postapiwrapper_config", - "model_type": "post_api_dall_e", - - # Required parameters - "api_url": "https://xxx.xxx", - "headers": { - # e.g. "Authorization": "Bearer xxx", - }, - - # Optional parameters - "messages_key": "messages", -} -``` -> ⚠️ Post Request Image Synthesis model wrapper (`PostAPIDALLEWrapper`) 有以下特性: -> 1) 它的 `._parse_response()` 方法假设生成的图片都以url的形式表示在`response["data"]["response"]["data"][i]["url"]` - - -
- -
-Post Request Embedding API (agentscope.models.PostAPIEmbeddingWrapper) - -```python -{ - "config_name": "my_postapiwrapper_config", - "model_type": "post_api_embedding", - - # Required parameters - "api_url": "https://xxx.xxx", - "headers": { - # e.g. "Authorization": "Bearer xxx", - }, - - # Optional parameters - "messages_key": "messages", -} -``` - -> ⚠️ Post Request Embedding model wrapper (`PostAPIEmbeddingWrapper`) 有以下特性: -> 1) 它的 `._parse_response()`方法假设生成的特征向量会存放在 `response["data"]["response"]["data"][i]["embedding"]` - -
- -
-Post Request API (agentscope.models.PostAPIModelWrapperBase) - -```python -{ - "config_name": "my_postapiwrapper_config", - "model_type": "post_api_chat", - - # 必要参数 - "api_url": "https://xxx.xxx", - "headers": { - # 例如:"Authorization": "Bearer xxx", - }, - - # 可选参数 - "messages_key": "messages", -} -``` -> ⚠️ Post request model wrapper (`PostAPIModelWrapperBase`) 返回原生的 HTTP 响应值, 且没有实现 `.format()`. 当运行样例时,推荐使用 `Post Request Chat API`. -> 使用`PostAPIModelWrapperBase`时,需要注意 -> 1) `.format()` 方法不能被调用; -> 2) 或开发者希望实现自己的`.format()`和/或`._parse_response()` - -
- - -
- -#### Anthropic API - -
- -Anthropic Chat API (agentscope.models.AnthropicChatWrapper) - - -```python -{ - "model_config": "my_anthropic_chat_config", - "model_type": "anthropic_chat", - "model_name": "claude-3-5-sonnet-20241022", - - # 必要参数 - "api_key": "{your_api_key}", - - # 可选参数 - "temperature": 0.5 -} -``` -
- -
- -## 从零搭建模型服务 - -针对需要自己搭建模型服务的开发者,AgentScope提供了一些脚本来帮助开发者快速搭建模型服务。您可以在[scripts](https://github.com/modelscope/agentscope/tree/main/scripts)目录下找到这些脚本以及说明。 - -具体而言,AgentScope提供了以下模型服务的脚本: - -- [CPU推理引擎ollama](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#ollama) -- [基于Flask + Transformers的模型服务](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#with-transformers-library) -- [基于Flask + ModelScope的模型服务](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#with-modelscope-library) -- [FastChat推理引擎](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#fastchat) -- [vllm推理引擎](https://github.com/modelscope/agentscope/blob/main/scripts/README.md#vllm) - -关于如何快速启动这些模型服务,用户可以参考[scripts](https://github.com/modelscope/agentscope/blob/main/scripts/)目录下的[README.md](https://github.com/modelscope/agentscope/blob/main/scripts/README.md)文件。 - -## 创建自己的Model Wrapper - -AgentScope允许开发者自定义自己的模型包装器。新的模型包装器类应该 - -- 继承自`ModelWrapperBase`类, -- 提供`model_type`字段以在模型配置中标识这个Model Wrapper类,并 -- 实现`__init__`和`__call__`函数。 -- 调用`agentscope.register_model_wrapper_class`函数,将其注册到AgentScope中。 - -```python -from agentscope.models import ModelWrapperBase - - -class MyModelWrapper(ModelWrapperBase): - model_type: str = "my_model" - - def __init__(self, config_name, my_arg1, my_arg2, **kwargs): - # 初始化模型实例 - super().__init__(config_name=config_name) - # ... - - def __call__(self, input, **kwargs) -> str: - # 调用模型实例 - # ... -``` - -然后调用`register_model_wrapper_class`函数将其注册到AgentScope中。 - -```python -import agentscope - -agentscope.register_model_wrapper_class(MyModelWrapper) - -my_model_config = { - # 基础参数 - "config_name": "my_model_config", - "model_type": "my_model", - - # 详细参数 - "my_arg1": "xxx", - "my_arg2": "yyy", - # ... -} -``` - -[[返回顶部]](#203-model-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/203-parser.md b/docs/sphinx_doc/zh_CN/source/tutorial/203-parser.md deleted file mode 100644 index 8ea13231c..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/203-parser.md +++ /dev/null @@ -1,534 +0,0 @@ -(203-parser-zh)= - -# 结果解析 - -## 目录 - -- [背景](#背景) -- [解析器模块](#解析器模块) - - [功能说明](#功能说明) - - [字符串类型](#字符串str类型) - - [MarkdownCodeBlockParser](#markdowncodeblockparser) - - [初始化](#初始化) - - [响应格式模版](#响应格式模版) - - [解析函数](#解析函数) - - [字典类型](#字典类型) - - [关于 DictFilterMixin](#关于-dictfiltermixin) - - [解析器](#解析器) - - [RegexTaggedContentParser](#regextaggedcontentparser) - - [初始化](#初始化) - - [MarkdownJsonDictParser](#markdownjsondictparser) - - [初始化 & 响应格式模版](#初始化--响应格式模版) - - [类型校验](#类型校验) - - [MultiTaggedContentParser](#multitaggedcontentparser) - - [初始化 & 响应格式模版](#初始化--响应格式模版-1) - - [解析函数](#解析函数-1) - - [JSON / Python 对象类型](#json--python-对象类型) - - [MarkdownJsonObjectParser](#markdownjsonobjectparser) - - [初始化 & 响应格式模版](#初始化--响应格式模版-2) - - [解析函数](#解析函数-2) -- [典型使用样例](#典型使用样例) - - [狼人杀游戏](#狼人杀游戏) - - [ReAct 智能体和工具使用](#react-智能体和工具使用) -- [自定义解析器](#自定义解析器) - - -## 背景 - -利用LLM构建应用的过程中,将 LLM 产生的字符串解析成指定的格式,提取出需要的信息,是一个非常重要的环节。 -但同时由于下列原因,这个过程也是一个非常复杂的过程: - -1. **多样性**:解析的目标格式多种多样,需要提取的信息可能是一段特定文本,一个JSON对象,或者是一个复杂的数据结构。 -2. **复杂性**:结果解析不仅仅是将 LLM 产生的文本转换成目标格式,还涉及到提示工程(提醒 LLM 应该产生什么格式的输出),错误处理等一些列问题。 -3. **灵活性**:同一个应用中,不同阶段也可能需要智能体产生不同格式的输出。 - -为了让开发者能够便捷、灵活的地进行结果解析,AgentScope设计并提供了解析器模块(Parser)。利用该模块,开发者可以通过简单的配置,实现目标格式的解析,同时可以灵活的切换解析的目标格式。 - -AgentScope中,解析器模块的设计原则是: -1. **灵活**:开发者可以灵活设置所需返回格式、灵活地切换解析器,实现不同格式的解析,而无需修改智能体类的代码,即具体的“目标格式”与智能体类内`reply`函数的处理逻辑解耦 -2. **自由**:用户可以自由选择是否使用解析器。解析器所提供的响应格式提示、解析结果等功能都是在`reply`函数内显式调用的,用户可以自由选择使用解析器或是自己实现代码实现结果解析 -3. **透明**:利用解析器时,提示(prompt)构建的过程和结果在`reply`函数内对开发者完全可见且透明,开发者可以精确调试自己的应用。 - -## 解析器模块 - -### 功能说明 - -解析器模块(Parser)的主要功能包括: - -1. 提供“响应格式说明”(format instruction),即提示 LLM 应该在什么位置产生什么输出,例如 - -```` -You should generate python code in a fenced code block as follows -```python -{your_python_code} -``` -```` - - -2. 提供解析函数(parse function),直接将 LLM 产生的文本解析成目标数据格式 - -3. 针对字典格式的后处理功能。在将文本解析成字典后,其中不同的字段可能有不同的用处 - -AgentScope提供了多种不同解析器,开发者可以根据自己的需求进行选择。 - -| 目标格式 | 解析器 | 说明 | -|-------------------|----------------------------|-----------------------------------------------------------------------------| -| 字符串(`str`)类型 | `MarkdownCodeBlockParser` | 要求 LLM 将指定的文本生成到Markdown中以 ``` 标识的代码块中,解析结果为字符串。 | -| 字典(`dict`)类型 | `MarkdownJsonDictParser` | 要求 LLM 在 \```json 和 \``` 标识的代码块中产生指定内容的字典,解析结果为 Python 字典。 | -| | `MultiTaggedContentParser` | 要求 LLM 在多个标签中产生指定内容,这些不同标签中的内容将一同被解析成一个 Python 字典,并填入不同的键值对中。 | -| | `RegexTaggedContentParser` | 适用于不确定标签名,不确定标签数量的场景。允许用户修改正则表达式,返回结果为字典。 | -| JSON / Python对象类型 | `MarkdownJsonObjectParser` | 要求 LLM 在 \```json 和 \``` 标识的代码块中产生指定的内容,解析结果将通过 `json.loads` 转换成 Python 对象。 | - -> **NOTE**: 相比`MarkdownJsonDictParser`,`MultiTaggedContentParser`更适合于模型能力不强,以及需要 LLM 返回内容过于复杂的情况。例如 LLM 返回 Python 代码,如果直接在字典中返回代码,那么 LLM 需要注意特殊字符的转义(\t,\n,...),`json.loads`读取时对双引号和单引号的区分等问题。而`MultiTaggedContentParser`实际是让大模型在每个单独的标签中返回各个键值,然后再将它们组成字典,从而降低了LLM返回的难度。 - -> **NOTE**:AgentScope 内置的响应格式说明并不一定是最优的选择。在 AgentScope 中,开发者可以完全控制提示构建的过程,因此,选择不使用parser中内置的相应格式说明,而是自定义新的相应格式说明,或是实现新的parser类都是可行的技术方案。 - -下面我们将根据不同的目标格式,介绍这些解析器的用法。 - -### 字符串(`str`)类型 - -#### MarkdownCodeBlockParser - -##### 初始化 - -- `MarkdownCodeBlockParser`采用 Markdown 代码块的形式,要求 LLM 将指定文本产生到指定的代码块中。可以通过`language_name`参数指定不同的语言,从而利用大模型代码能力产生对应的输出。例如要求大模型产生 Python 代码时,初始化如下: - - ```python - from agentscope.parsers import MarkdownCodeBlockParser - - parser = MarkdownCodeBlockParser(language_name="python", content_hint="your python code") - ``` - -##### 响应格式模版 - -- `MarkdownCodeBlockParser`类提供如下的“响应格式说明”模版,在用户调用`format_instruction`属性时,会将`{language_name}`替换为初始化时输入的字符串: - - ```` - You should generate {language_name} code in a {language_name} fenced code block as follows: - ```{language_name} - {content_hint} - ``` - ```` - -- 例如上述对`language_name`为`"python"`的初始化,调用`format_instruction`属性时,会返回如下字符串: - - ```python - print(parser.format_instruction) - ``` - - ```` - You should generate python code in a python fenced code block as follows - ```python - your python code - ``` - ```` - -##### 解析函数 - -- `MarkdownCodeBlockParser`类提供`parse`方法,用于解析LLM产生的文本,返回的是字符串。 - - ````python - res = parser.parse( - ModelResponse( - text="""The following is generated python code - ```python - print("Hello world!") - ``` - """ - ) - ) - - print(res.parsed) - ```` - - ``` - print("hello world!") - ``` - -### 字典类型 - -#### 关于 DictFilterMixin - -与字符串和一般的 JSON / Python 对象不同,作为 LLM 应用中常用的数据格式,AgentScope 通过 [`DictFilterMixin`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/parsers/parser_base.py#L77) 类为字典类型的解析提供后处理功能。 - -初始化解析器时,可以通过额外设置`keys_to_content`,`keys_to_memory`,`keys_to_metadata`三个参数,从而实现在调用`parser`的`to_content`,`to_memory`和`to_metadata`方法时,对字典键值对的过滤。 -其中 - - `keys_to_content` 指定的键值对将被放置在返回`Msg`对象中的`content`字段,这个字段内容将会被返回给其它智能体,参与到其他智能体的提示构建中,同时也会被`self.speak`函数调用,用于显式输出 - - `keys_to_memory` 指定的键值对将被存储到智能体的记忆中 - - `keys_to_metadata` 指定的键值对将被放置在`Msg`对象的`metadata`字段,可以用于应用的控制流程判断,或挂载一些不需要返回给其它智能体的信息。 - -三个参数接收布尔值、字符串和字符串列表。其值的含义如下: -- `False`: 对应的过滤函数将返回`None`。 -- `True`: 整个字典将被返回。 -- `str`: 对应的键值将被直接返回,注意返回的会是对应的值而非字典。 -- `List[str]`: 根据键值对列表返回过滤后的字典。 - -AgentScope中,`keys_to_content` 和 `keys_to_memory` 默认为 `True`,即整个字典将被返回。`keys_to_metadata` 默认为 `False`,即对应的过滤函数将返回 `None`。 - -下面是狼人杀游戏的样例,在白天讨论过程中 LLM 扮演狼人产生的字典。在这个例子中, -- `"thought"`字段不应该返回给其它智能体,但是应该存储在智能体的记忆中,从而保证狼人策略的延续; -- `"speak"`字段应该被返回给其它智能体,并且存储在智能体记忆中; -- `"finish_discussion"`字段用于应用的控制流程中,判断讨论是否已经结束。为了节省token,该字段不应该被返回给其它的智能体,同时也不应该存储在智能体的记忆中。 - - ```python - { - "thought": "The others didn't realize I was a werewolf. I should end the discussion soon.", - "speak": "I agree with you.", - "finish_discussion": True - } - ``` - -AgentScope中,我们通过调用`to_content`,`to_memory`和`to_metadata`方法实现后处理功能,示意代码如下: - -- 应用中的控制流代码,创建对应的解析器对象并装载 - - ```python - from agentscope.parsers import MarkdownJsonDictParser - - # ... - - agent = DictDialogAgent(...) - - # 以MarkdownJsonDictParser为例 - parser = MarkdownJsonDictParser( - content_hint={ - "thought": "what you thought", - "speak": "what you speak", - "finish_discussion": "whether the discussion is finished" - }, - keys_to_content="speak", - keys_to_memory=["thought", "speak"], - keys_to_metadata=["finish_discussion"] - ) - - # 装载解析器,即相当于指定了要求的相应格式 - agent.set_parser(parser) - - # 讨论过程 - while True: - # ... - x = agent(x) - # 根据metadata字段,获取LLM对当前是否应该结束讨论的判断 - if x.metadata["finish_discussion"]: - break - ``` - - -- 智能体内部`reply`函数内实现字典的过滤 - - ```python - # ... - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - - # ... - res = self.model(prompt, parse_func=self.parser.parse) - - # 过滤后拥有 thought 和 speak 字段的字典,存储到智能体记忆中 - self.memory.add( - Msg( - self.name, - content=self.parser.to_memory(res.parsed), - role="assistant", - ) - ) - - # 存储到content中,同时存储到metadata中 - msg = Msg( - self.name, - content=self.parser.to_content(res.parsed), - role="assistant", - metadata=self.parser.to_metadata(res.parsed), - ) - self.speak(msg) - - return msg - ``` - - - - -> **Note**: `keys_to_content`,`keys_to_memory`和`keys_to_metadata`参数可以是列表,字符串,也可以是布尔值。 -> - 如果是`True`,则会直接返回整个字典,即不进行过滤 -> - 如果是`False`,则会直接返回`None`值 -> - 如果是字符串类型,则`to_content`,`to_memory`和`to_metadata`方法将会把字符串对应的键值直接放入到对应的位置,例如`keys_to_content="speak"`,则`to_content`方法将会把`res.parsed["speak"]`放入到`Msg`对象的`content`字段中,`content`字段会是字符串而不是字典。 -> - 如果是列表类型,则`to_content`,`to_memory`和`to_metadata`方法实现的将是过滤功能,对应过滤后的结果是字典 -> ```python -> parser = MarkdownJsonDictParser( -> content_hint={ -> "thought": "what you thought", -> "speak": "what you speak", -> }, -> keys_to_content="speak", -> keys_to_memory=["thought", "speak"], -> ) -> -> example_dict = {"thought": "abc", "speak": "def"} -> print(parser.to_content(example_dict)) # def -> print(parser.to_memory(example_dict)) # {"thought": "abc", "speak": "def"} -> print(parser.to_metadata(example_dict)) # None -> ``` -> ``` -> def -> {"thought": "abc", "speak": "def"} -> None -> ``` - -#### 解析器 - -针对字典类型的返回值,AgentScope 提供了多种不同的解析器,开发者可以根据自己的需求进行选择。 - -##### RegexTaggedContentParser - -###### 初始化 - -`RegexTaggedContentParser` 主要用于1)不确定的标签名,以及2)不确定标签数量的场景。在这种情况下,该解析器无法提供一个泛用性广的响应格式说明,因此需要开发者在初始化时提供对应的相应格式说明(`format_instruction`)。 -除此之外,用户可以通过设置`try_parse_json`,`required_keys`等参数,设置解析器的行为。 - -```python -from agentscope.parsers import RegexTaggedContentParser - -parser = RegexTaggedContentParser( - format_instruction="""Respond with specific tags as outlined below -what you thought -what you speak -""", - try_parse_json=True, # 尝试将标签内容解析成 JSON 对象 - required_keys=["thought", "speak"] # 必须包含的键 -) -``` - -##### MarkdownJsonDictParser - -###### 初始化 & 响应格式模版 - -- `MarkdownJsonDictParser`要求 LLM 在 \```json 和 \``` 标识的代码块中产生指定内容的字典。 -- 除了`to_content`,`to_memory`和`to_metadata`参数外,可以通过提供 `content_hint` 参数提供响应结果样例和说明,即提示LLM应该产生什么样子的字典,该参数可以是字符串,也可以是字典,在构建响应格式提示的时候将会被自动转换成字符串进行拼接。 - - ```python - from agentscope.parsers import MarkdownJsonDictParser - - # 字典 - MarkdownJsonDictParser( - content_hint={ - "thought": "what you thought", - "speak": "what you speak", - } - ) - # 或字符串 - MarkdownJsonDictParser( - content_hint="""{ - "thought": "what you thought", - "speak": "what you speak", - }""" - ) - ``` - - 对应的`instruction_format`属性 - - ```` - You should respond a json object in a json fenced code block as follows: - ```json - {content_hint} - ``` - ```` - -###### 类型校验 - -`MarkdownJsonDictParser`中的`content_hint`参数还支持基于Pydantic的类型校验。初始化时,可以将`content_hint`设置为一个Pydantic的模型类,AgentScope将根据这个类来修改`instruction_format`属性,并且利用Pydantic在解析时对LLM返回的字典进行类型校验。 -该功能需要LLM能够理解JSON schema格式的提示,因此适用于能力较强的大模型。 - -一个简单的例子如下,`"..."`处可以填写具体的类型校验规则,可以参考[Pydantic](https://docs.pydantic.dev/latest/)文档。 - - ```python - from pydantic import BaseModel, Field - from agentscope.parsers import MarkdownJsonDictParser - - class Schema(BaseModel): - thought: str = Field(..., description="what you thought") - speak: str = Field(..., description="what you speak") - end_discussion: bool = Field(..., description="whether the discussion is finished") - - parser = MarkdownJsonDictParser(content_hint=Schema) - ``` - -- 对应的`format_instruction`属性 - -```` -Respond a JSON dictionary in a markdown's fenced code block as follows: -```json -{a_JSON_dictionary} -``` -The generated JSON dictionary MUST follow this schema: -{'properties': {'speak': {'description': 'what you speak', 'title': 'Speak', 'type': 'string'}, 'thought': {'description': 'what you thought', 'title': 'Thought', 'type': 'string'}, 'end_discussion': {'description': 'whether the discussion reached an agreement or not', 'title': 'End Discussion', 'type': 'boolean'}}, 'required': ['speak', 'thought', 'end_discussion'], 'title': 'Schema', 'type': 'object'} -```` - -- 同时在解析的过程中,也将使用Pydantic进行类型校验,校验错误将抛出异常。同时,Pydantic也将提供一定的容错处理能力,例如将字符串`"true"`转换成Python的`True`: - -```` -parser.parser(""" -```json -{ - "thought": "The others didn't realize I was a werewolf. I should end the discussion soon.", - "speak": "I agree with you.", - "end_discussion": "true" -} -``` -""") -```` - -##### MultiTaggedContentParser - -`MultiTaggedContentParser`要求 LLM 在多个指定的标签对中产生指定的内容,这些不同标签的内容将一同被解析为一个 Python 字典。使用方法与`MarkdownJsonDictParser`类似,只是初始化方法不同,更适合能力较弱的LLM,或是比较复杂的返回内容。 - -###### 初始化 & 响应格式模版 - -`MultiTaggedContentParser`中,每一组标签将会以`TaggedContent`对象的形式传入,其中`TaggedContent`对象包含了 -- 标签名(`name`),即返回字典中的key值 -- 开始标签(`tag_begin`) -- 标签内容提示(`content_hint`) -- 结束标签(`tag_end`) -- 内容解析功能(`parse_json`),默认为`False`。当置为`True`时,将在响应格式提示中自动添加提示,并且提取出的内容将通过`json.loads`解析成 Python 对象 - -```python -from agentscope.parsers import MultiTaggedContentParser, TaggedContent -parser = MultiTaggedContentParser( - TaggedContent( - name="thought", - tag_begin="[THOUGHT]", - content_hint="what you thought", - tag_end="[/THOUGHT]" - ), - TaggedContent( - name="speak", - tag_begin="[SPEAK]", - content_hint="what you speak", - tag_end="[/SPEAK]" - ), - TaggedContent( - name="finish_discussion", - tag_begin="[FINISH_DISCUSSION]", - content_hint="true/false, whether the discussion is finished", - tag_end="[/FINISH_DISCUSSION]", - parse_json=True, # 我们希望这个字段的内容直接被解析成 True 或 False 的 Python 布尔值 - ) -) - -print(parser.format_instruction) -``` - -``` -Respond with specific tags as outlined below, and the content between [FINISH_DISCUSSION] and [/FINISH_DISCUSSION] MUST be a JSON object: -[THOUGHT]what you thought[/THOUGHT] -[SPEAK]what you speak[/SPEAK] -[FINISH_DISCUSSION]true/false, whether the discussion is finished[/FINISH_DISCUSSION] -``` - -###### 解析函数 - -- `MultiTaggedContentParser`的解析结果为字典,其中key为`TaggedContent`对象的`name`的值,以下是狼人杀中解析 LLM 返回的样例: - -```python -res_dict = parser.parse( - ModelResponse(text="""As a werewolf, I should keep pretending to be a villager -[THOUGHT]The others didn't realize I was a werewolf. I should end the discussion soon.[/THOUGHT] -[SPEAK]I agree with you.[/SPEAK] -[FINISH_DISCUSSION]true[/FINISH_DISCUSSION] -""" - ) -) - -print(res_dict) -``` - -``` -{ - "thought": "The others didn't realize I was a werewolf. I should end the discussion soon.", - "speak": "I agree with you.", - "finish_discussion": true -} -``` - -### JSON / Python 对象类型 - -#### MarkdownJsonObjectParser - -`MarkdownJsonObjectParser`同样采用 Markdown 的\```json和\```标识,但是不限制解析的内容的类型,可以是列表,字典,数值,字符串等可以通过`json.loads`进行解析字符串。 - -##### 初始化 & 响应格式模版 - -```python -from agentscope.parsers import MarkdownJsonObjectParser - -parser = MarkdownJsonObjectParser( - content_hint="{A list of numbers.}" -) - -print(parser.format_instruction) -``` - -```` -You should respond a json object in a json fenced code block as follows: -```json -{a list of numbers} -``` -```` - -##### 解析函数 - -````python -res = parser.parse( - ModelResponse(text="""Yes, here is the generated list -```json -[1,2,3,4,5] -``` -""" - ) -) - -print(type(res)) -print(res) -```` - -``` - -[1, 2, 3, 4, 5] -``` - -## 典型使用样例 - -### 狼人杀游戏 - -狼人杀(Werewolf)是字典解析器的一个经典使用场景,在游戏的不同阶段内,需要同一个智能体在不同阶段产生除了`"thought"`和`"speak"`外其它的标识字段,例如是否结束讨论,预言家是否使用能力,女巫是否使用解药和毒药,投票等。 - -AgentScope中已经内置了[狼人杀](https://github.com/modelscope/agentscope/tree/main/examples/game_werewolf)的样例,该样例采用`DictDialogAgent`类,配合不同的解析器,实现了灵活的目标格式切换。同时利用解析器的后处理功能,实现了“想”与“说”的分离,同时控制游戏流程的推进。 -详细实现请参考狼人杀[源码](https://github.com/modelscope/agentscope/tree/main/examples/game_werewolf)。 - -### ReAct 智能体和工具使用 - -`ReActAgent`是AgentScope中为了工具使用构建的智能体类,基于 ReAct 算法进行搭建,可以配合不同的工具函数进行使用。其中工具的调用,格式解析,采用了和解析器同样的实现思路。详细实现请参考[代码](https://github.com/modelscope/agentscope/blob/main/src/agentscope/agents/react_agent.py)。 - - -## 自定义解析器 - -AgentScope中提供了解析器的基类`ParserBase`,开发者可以通过继承该基类,并实现其中的`format_instruction`属性和`parse`方法来实现自己的解析器。 - -针对目标格式是字典类型的解析,可以额外继承`agentscope.parser.DictFilterMixin`类实现对字典类型的后处理。 - -```python -from abc import ABC, abstractmethod - -from agentscope.models import ModelResponse - - -class ParserBase(ABC): - """The base class for model response parser.""" - - format_instruction: str - """The instruction for the response format.""" - - @abstractmethod - def parse(self, response: ModelResponse) -> ModelResponse: - """Parse the response text to a specific object, and stored in the - parsed field of the response object.""" - - # ... -``` diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/203-stream.md b/docs/sphinx_doc/zh_CN/source/tutorial/203-stream.md deleted file mode 100644 index 82df56371..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/203-stream.md +++ /dev/null @@ -1,121 +0,0 @@ -(203-stream-zh)= - -# 流式输出 - -AgentScope 支持在**终端**和 **AgentScope Studio** 中使用以下大模型 API 的流式输出模式。 - -| API | Model Wrapper | 对应的 `model_type` 域 | -|--------------------|---------------------------------------------------------------------------------------------------------------------------------|--------------------| -| OpenAI Chat API | [`OpenAIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/openai_model.py) | `"openai_chat"` | -| DashScope Chat API | [`DashScopeChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/dashscope_model.py) | `"dashscope_chat"` | -| Gemini Chat API | [`GeminiChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/gemini_model.py) | `"gemini_chat"` | -| ZhipuAI Chat API | [`ZhipuAIChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/zhipu_model.py) | `"zhipuai_chat"` | -| ollama Chat API | [`OllamaChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/ollama_model.py) | `"ollama_chat"` | -| LiteLLM Chat API | [`LiteLLMChatWrapper`](https://github.com/modelscope/agentscope/blob/main/src/agentscope/models/litellm_model.py) | `"litellm_chat"` | - - -## 设置流式输出 - -AgentScope 允许用户在模型配置和模型调用中设置流式输出模式。 - -### 模型配置 - -在模型配置中将 `stream` 字段设置为 `True` 以使用流式输出模式。 - -```python -model_config = { - "config_name": "xxx", - "model_type": "xxx", - "stream": True, - # ... -} -``` - -### 模型调用 - -在智能体中,可以在调用模型时将 `stream` 参数设置为 `True`。注意,模型调用中的 `stream` 参数将覆盖模型配置中的 `stream` 字段。 - -```python -class MyAgent(AgentBase): - # ... - def reply(self, x: Optional[Msg, Sequence[Msg]] = None) -> Msg: - # ... - response = self.model( - prompt, - stream=True, - ) - # ... -``` - -## 流式打印 - -在流式输出模式下,模型响应的 `stream` 字段将是一个生成器,而 `text` 字段将是 `None`。 -为了与非流式兼容,用户一旦在迭代生成器前访问 `text` 字段,`stream` 中的生成器将被迭代以生成完整的文本,并将其存储在 `text` 字段中。 -因此,即使在流式输出模式下,用户也可以像往常一样在 `text` 字段中处理响应文本而无需任何改变。 - -但是,如果用户需要流式的输出,只需要将生成器放在 `self.speak` 函数中,以在终端和 AgentScope Studio 中流式打印文本。 - -```python - def reply(self, x: Optional[Msg, Sequence[Msg]] = None) -> Msg: - # ... - # 如果想在调用时使用流式打印,在这里调用时使用 stream=True - response = self.model(prompt) - - # 程序运行到这里时,response.text 为 None - - # 在 terminal 和 AgentScope Studio 中流式打印文本 - self.speak(response.stream) - - # 生成器被迭代时,产生的文本将自动被存储在 response.text 中,因此用户可以直接使用 response.text 处理响应文本 - msg = Msg(self.name, content=response.text, role="assistant") - - self.memory.add(msg) - - return msg - -``` - -## 进阶用法 - -如果用户想要自己处理流式输出,可以通过迭代生成器来实时获得流式的响应文本。 - -An example of how to handle the streaming response is in the `speak` function of `AgentBase` as follows. -关于如何处理流式输出,可以参考 `AgentBase` 中的 `speak` 函数。 -The `log_stream_msg` function will print the streaming response in the terminal and AgentScope Studio (if registered). -其中 `log_stream_msg` 函数将在终端和 AgentScope Studio 中实时地流式打印文本。 - -```python - # ... - elif isinstance(content, GeneratorType): - # 流式消息必须共享相同的 id 才能在 AgentScope Studio 中显示,因此这里通过同一条消息切换 content 字段来实现 - msg = Msg(name=self.name, content="", role="assistant") - for last, text_chunk in content: - msg.content = text_chunk - log_stream_msg(msg, last=last) - else: - # ... -``` - -在处理生成器的时候,用户应该记住以下几点: - -1. 在迭代生成器时,`response.text` 字段将自动包含已迭代的文本。 -2. `stream` 字段中的生成器将生成一个布尔值和字符串的二元组。布尔值表示当前是否是最后一段文本,而字符串则是到目前为止的响应文本。 -3. AgentScope Studio 依据 `log_stream_msg` 函数中输入的 `Msg` 对象的 id 判断文本是否属于同一条流式响应,若 id 不同,则会被视为不同的响应。 - - -```python - def reply(self, x: Optional[Msg, Sequence[Msg]] = None) -> Msg: - # ... - response = self.model(prompt) - - # 程序运行到这里时,response.text 为 None - - # 迭代生成器,自己处理响应文本 - for last_chunk, text in response.stream: - # 按照自己的需求处理响应文本 - # ... - - -``` - -[[Return to the top]](#203-stream-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md b/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md deleted file mode 100644 index 88afc655b..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md +++ /dev/null @@ -1,312 +0,0 @@ -(204-service-zh)= - -# 工具 - -服务函数(Service function)是可以增强智能体能力工具,例如执行Python代码、网络搜索、 -文件操作等。本教程概述了AgentScope中可用的服务功能,同时介绍如何使用它们来增强智能体的能力。 - -## Service函数概览 - -下面的表格按照类型概述了各种Service函数。以下函数可以通过`agentscope.service.{函数名}`进行调用。 - -| Service场景 | Service函数名称 | 描述 | -|------------|---------------------------------------|--------------------------------------------------------------------| -| 代码 | `execute_python_code` | 执行一段 Python 代码,可选择在 Docker 容器内部执行。 | -| | `NoteBookExecutor` | 在 NoteBookExecutor 的 IPython 环境中执行一段 Python 代码,遵循 IPython 交互式计算风格。 | -| 检索 | `retrieve_from_list` | 根据给定的标准从列表中检索特定项目。 | -| | `cos_sim` | 计算2个embedding的余弦相似度。 | -| SQL查询 | `query_mysql` | 在 MySQL 数据库上执行 SQL 查询并返回结果。 | -| | `query_sqlite` | 在 SQLite 数据库上执行 SQL 查询并返回结果。 | -| | `query_mongodb` | 对 MongoDB 集合执行查询或操作。 | -| 文本处理 | `summarization` | 使用大型语言模型总结一段文字以突出其主要要点。 | -| 网络 | `bing_search` | 使用bing搜索。 | -| | `google_search` | 使用google搜索。 | -| | `arxiv_search` | 使用arxiv搜索。 | -| | `download_from_url` | 从指定的 URL 下载文件。 | -| | `load_web` | 爬取并解析指定的网页链接 (目前仅支持爬取 HTML 页面) | -| | `digest_webpage` | 对已经爬取好的网页生成摘要信息(目前仅支持 HTML 页面) | -| | `dblp_search_publications` | 在dblp数据库里搜索文献。 | -| | `dblp_search_authors` | 在dblp数据库里搜索作者。 | -| | `dblp_search_venues` | 在dblp数据库里搜索期刊,会议及研讨会。 | -| | `tripadvisor_search` | 使用 TripAdvisor API 搜索位置。 | -| | `tripadvisor_search_location_photos` | 使用 TripAdvisor API 检索特定位置的照片。 | -| | `tripadvisor_search_location_details` | 使用 TripAdvisor API 获取特定位置的详细信息。 | -| 文件处理 | `create_file` | 在指定路径创建一个新文件,并可选择添加初始内容。 | -| | `delete_file` | 删除由文件路径指定的文件。 | -| | `move_file` | 将文件从一个路径移动或重命名到另一个路径。 | -| | `create_directory` | 在指定路径创建一个新的目录。 | -| | `delete_directory` | 删除一个目录及其所有内容。 | -| | `move_directory` | 将目录从一个路径移动或重命名到另一个路径。 | -| | `read_text_file` | 读取并返回文本文件的内容。 | -| | `write_text_file` | 向指定路径的文件写入文本内容。 | -| | `read_json_file` | 读取并解析 JSON 文件的内容。 | -| | `write_json_file` | 将 Python 对象序列化为 JSON 并写入到文件。 | -| 多模态 | `dashscope_text_to_image` | 使用 DashScope API 将文本生成图片。 | -| | `dashscope_image_to_text` | 使用 DashScope API 根据图片生成文字。 | -| | `dashscope_text_to_audio` | 使用 DashScope API 根据文本生成音频。 | -| | `openai_text_to_image` | 使用 OpenAI API根据文本生成图片。 | -| | `openai_edit_image` | 使用 OpenAI API 根据提供的遮罩和提示编辑图像。 | -| | `openai_create_image_variation` | 使用 OpenAI API 创建图像的变体。 | -| | `openai_image_to_text` | 使用 OpenAI API 根据图片生成文字。 | -| | `openai_text_to_audio` | 使用 OpenAI API 根据文本生成音频。 | -| | `openai_audio_to_text` | 使用OpenAI API将音频转换为文本。 | -| *更多服务即将推出* | | 正在开发更多服务功能,并将添加到 AgentScope 以进一步增强其能力。 | - -关于详细的参数、预期输入格式、返回类型,请参阅[API文档](https://modelscope.github.io/agentscope/)。 - -## 使用Service函数 - -AgentScope为Service函数提供了两个服务类,分别是`ServiceToolkit`和`ServiceResponse`。 - -### 关于ServiceToolkit - -大模型使用工具函数通常涉及以下5个步骤: - -1. **准备工具函数**。即开发者通过提供必要的参数(例如api key、用户名、密码等)将工具函数预处理成大模型能直接调用的形式。 -2. **为大模型准备工具描述**。即一份详细的函数功能描述,以便大模型能够正确理解工具函数。 -3. **约定函数调用格式**。提供一份说明来告诉大模型如何调用工具函数,即调用格式。 -4. **解析大模型返回值**。从大模型获取返回值之后,需要按照第三步中的调用格式来解析字符串。 -5. **调用函数并处理异常**。实际调用函数,返回结果,并处理异常。 - -为了简化上述步骤并提高复用性,AgentScope引入了ServiceToolkit模块。它可以 -- 注册python函数为工具函数 -- 生成字符串和JSON schema格式的工具函数说明 -- 内置一套工具函数的调用格式 -- 解析模型响应、调用工具功能,并处理异常 - - -#### 如何使用 - -按照以下步骤使用ServiceToolkit: - -1. 初始化一个ServiceToolkit对象并注册服务函数及其必要参数。例如,以下Bing搜索功能。 - -```python -def bing_search( - question: str, - api_key: str, - num_results: int = 10, - **kwargs: Any, -) -> ServiceResponse: - """ - Search question in Bing Search API and return the searching results - - Args: - question (`str`): - The search query string. - api_key (`str`): - The API key provided for authenticating with the Bing Search API. - num_results (`int`, defaults to `10`): - The number of search results to return. - **kwargs (`Any`): - Additional keyword arguments to be included in the search query. - For more details, please refer to - https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/query-parameters - - [omitted for brevity] - """ -``` -We register the function in a `ServiceToolkit` object by providing `api_key` and `num_results` as necessary parameters. -我们通过提供`api_key`和`num_results`作为必要参数,在`ServiceToolkit`对象中注册bing_search函数。 - -```python -from agentscope.service import ServiceToolkit - -service_toolkit = ServiceToolkit() - -service_toolkit.add( - bing_search, - api_key="xxx", - num_results=3 -) -``` - -2. 在提示中使用`tools_instruction`属性指导LLM,或使用`json_schemas`属性获取JSON schema格式的说明,以构建自定义格式的函数说明或直接在模型API中使用(例如OpenAI Chat API)。 - - -````text ->> print(service_toolkit.tools_instruction) -## Tool Functions: -The following tool functions are available in the format of -``` -{index}. {function name}: {function description} -{argument1 name} ({argument type}): {argument description} -{argument2 name} ({argument type}): {argument description} -... -``` - -1. bing_search: Search question in Bing Search API and return the searching results - question (str): The search query string. -```` -````text ->> print(service_toolkit.json_schemas) -{ - "bing_search": { - "type": "function", - "function": { - "name": "bing_search", - "description": "Search question in Bing Search API and return the searching results", - "parameters": { - "type": "object", - "properties": { - "question": { - "type": "string", - "description": "The search query string." - } - }, - "required": [ - "question" - ] - } - } - } -} -```` - -3. 通过`tools_calling_format`属性指导LLM如何使用工具函数。ServiceToolkit中默认大模型 -需要返回一个JSON格式的列表,列表中包含若干个字典,每个字典即为一个函数调用。必须包含name和 -arguments两个字段,其中name为函数名,arguments为函数参数。arguments键值对应的值是从 -“参数名”映射到“参数值”的字典。 - -```text ->> print(service_toolkit.tools_calling_format) -[{"name": "{function name}", "arguments": {"{argument1 name}": xxx, "{argument2 name}": xxx}}] -``` - -4. 通过`parse_and_call_func`方法解析大模型生成的字符串,并调用函数。此函数可以接收字符串或解析后符合格式要求的字典作为输入。 -- 当输入为字符串时,此函数将相应地解析字符串并使用解析后的参数执行函数。 -- 而如果输入为解析后的字典,则直接调用函数。 - - -```python -# a string input -string_input = '[{"name": "bing_search", "arguments": {"question": "xxx"}}]' -res_of_string_input = service_toolkit.parse_and_call_func(string_input) - -# or a parsed dictionary -dict_input = [{"name": "bing_search", "arguments": {"question": "xxx"}}] -# res_of_dict_input is the same as res_of_string_input -res_of_dict_input = service_toolkit.parse_and_call_func(dict_input) - -print(res_of_string_input) -``` -``` -1. Execute function bing_search - [ARGUMENTS]: - question: xxx - [STATUS]: SUCCESS - [RESULT]: ... -``` - -关于ServiceToolkit的具体使用样例,可以参考`agentscope.agents`中`ReActAgent`类。 - -#### 创建新的Service函数 - -新的Service函数必须满足以下要求才能被ServiceToolkit正常使用: -1. 具有格式化的函数说明(推荐Google风格),以便ServiceToolkit提取函数说明。 -2. 函数名称应该是自解释的,这样智能体可以理解函数并正确使用它。 -3. 在定义函数时应提供参数的类型(例如`def func(a: int, b: str, c: bool)`),以便大模型 -能够给出类型正确的参数。 - - -### 关于ServiceResponse - -`ServiceResponse`是对调用的结果的封装,包含了`status`和`content`两个字段。 -当Service函数正常运行结束时,`status`为`ServiceExecStatus. -SUCCESS`,`content`为函数的返回值。而当运行出现错误时,`status`为`ServiceExecStatus. -Error`,`content`内为错误信息。 - -```python -class ServiceResponse(dict): - """Used to wrap the execution results of the services""" - - __setattr__ = dict.__setitem__ - __getattr__ = dict.__getitem__ - - def __init__( - self, - status: ServiceExecStatus, - content: Any, - ): - """Constructor of ServiceResponse - - Args: - status (`ServiceExeStatus`): - The execution status of the service. - content (`Any`) - If the argument`status` is `SUCCESS`, `content` is the - response. We use `object` here to support various objects, - e.g. str, dict, image, video, etc. - Otherwise, `content` is the error message. - """ - self.status = status - self.content = content - - # ... [为简洁起见省略代码] - -``` - -## 示例 - -```python -import json -import inspect -from agentscope.service import ServiceResponse -from agentscope.agents import AgentBase - - -def create_file(file_path: str, content: str = "") -> ServiceResponse: - """ - 创建文件并向其中写入内容。 - - Args: - file_path (str): 将要创建文件的路径。 - content (str): 要写入文件的内容。 - - Returns: - ServiceResponse: 其中布尔值指示成功与否,字符串包含任何错误消息(如果有),包括错误类型。 - """ - # ... [为简洁起见省略代码] - - -class YourAgent(AgentBase): - # ... [为简洁起见省略代码] - - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - # ... [为简洁起见省略代码] - - # 构造提示,让代理提供 JSON 格式的参数 - prompt = ( - f"To complete the user request\n```{x['content']}```\n" - "Please provide the necessary parameters in JSON format for the " - "function:\n" - f"Function: {create_file.__name__}\n" - "Description: Create a file and write content to it.\n" - ) - - # 添加关于函数参数的详细信息 - sig = inspect.signature(create_file) - parameters = sig.parameters.items() - params_prompt = "\n".join( - f"- {name} ({param.annotation.__name__}): " - f"{'(default: ' + json.dumps(param.default) + ')'if param.default is not inspect.Parameter.empty else ''}" - for name, param in parameters - ) - prompt += params_prompt - - # 获取模型响应 - model_response = self.model(prompt).text - - # 解析模型响应并调用 create_file 函数 - # 可能需要额外的提取函数 - try: - kwargs = json.loads(model_response) - create_file(**kwargs) - except: - # 错误处理 - pass - - # ... [为简洁起见省略代码] -``` - -[[返回顶部]](#204-service-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/205-memory.md b/docs/sphinx_doc/zh_CN/source/tutorial/205-memory.md deleted file mode 100644 index 25b20de64..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/205-memory.md +++ /dev/null @@ -1,214 +0,0 @@ -(205-memory-zh)= - -# 记忆 - -AgentScope中,记忆(memory)用于存储历史消息,从而使智能体能够根据上下文提供更加连贯,更加 -自然的响应。 -本教程将首先介绍memory中信息的载体,消息(message),然后介绍AgentScope中记忆模块的功能 -和使用方法。 - -## 关于消息(Message) - -### 消息基类(`MessageBase`) - -AgentScope中,消息基类是Python字典的子类,由`name`,`content`两个必选字段和一个可选的字段 -`url`组成。由于是字典类型,开发者也可以根据需要增加其他字段。 -其中,`name`字段代表消息的发起者,`content`字段代表消息的内容,`url -`则代表消息中附加的数据链接,可以是指向多模态数据的本地链接,也可以是网络链接。 -当一个消息被创建时,将会自动创建一个唯一的ID,用于标识这条消息。同时,消息的创建时间也会以 -时间戳的形式自动记录下来。 - -具体实现中,AgentScope首先提供了一个`MessageBase`基类,用于定义消息的基本属性和使用方法。 -与一般的字典类型不同,`MessageBase`的实例化对象可以通过`对象名.{属性名}`的方式访问属性值, -也可以通过`对象名['属性名']`的方式访问属性值。 -其中,`MessageBase` 类的关键属性如下: - -- **`name`**:该属性表示信息的发起者。这是一项关键的元数据,在需要区分不同发言者的场景中非常有用。 -- **`content`**:信息本身的内容。它可以包括文本、结构化数据或其他与交互相关且需要智能体处理的内容形式。 -- **`url`**:可选属性,允许信息链接到外部资源。这些可以是指向文件的直接链接、多模态数据或网页。 -- **`timestamp`**:时间戳,显示信息创建的时间。 -- **`id`**:每条信息在创建时都会被分配一个唯一标识符(ID)。 - -```python -class MessageBase(dict): - """Base Message class, which is used to maintain information for dialog, - memory and used to construct prompt. - """ - - def __init__( - self, - name: str, - content: Any, - url: Optional[Union[Sequence[str], str]] = None, - timestamp: Optional[str] = None, - **kwargs: Any, - ) -> None: - """Initialize the message object - - Args: - name (`str`): - The name of who send the message. It's often used in - role-playing scenario to tell the name of the sender. - However, you can also only use `role` when calling openai api. - The usage of `name` refers to - https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models. - content (`Any`): - The content of the message. - url (`Optional[Union[list[str], str]]`, defaults to None): - A url to file, image, video, audio or website. - timestamp (`Optional[str]`, defaults to None): - The timestamp of the message, if None, it will be set to - current time. - **kwargs (`Any`): - Other attributes of the message. For OpenAI API, you should - add "role" from `["system", "user", "assistant", "function"]`. - When calling OpenAI API, `"role": "assistant"` will be added - to the messages that don't have "role" attribute. - - """ - # id and timestamp will be added to the object as its attributes - # rather than items in dict - self.id = uuid4().hex - if timestamp is None: - self.timestamp = _get_timestamp() - else: - self.timestamp = timestamp - - self.name = name - self.content = content - - if url: - self.url = url - - self.update(kwargs) - - def __getattr__(self, key: Any) -> Any: - try: - return self[key] - except KeyError as e: - raise AttributeError(f"no attribute '{key}'") from e - - def __setattr__(self, key: Any, value: Any) -> None: - self[key] = value - - def __delattr__(self, key: Any) -> None: - try: - del self[key] - except KeyError as e: - raise AttributeError(f"no attribute '{key}'") from e - - def to_str(self) -> str: - """Return the string representation of the message""" - raise NotImplementedError - - def serialize(self) -> str: - """Return the serialized message.""" - raise NotImplementedError - - # ... [省略代码以简化] -``` - -### 消息类(`Msg`) - -`Msg`类是AgentScope中最常用的消息类。它继承了 `MessageBase`类,并实现了`to_str` 和 -`serialize` 抽象方法。 -在一个Agent类中,其`reply`函数通常会返回一个`Msg`类的实例,以便在AgentScope中进行消息的 -传递。 - -```python -class Msg(MessageBase): - """The Message class.""" - - def __init__( - self, - name: str, - content: Any, - url: Optional[Union[Sequence[str], str]] = None, - timestamp: Optional[str] = None, - echo: bool = False, - **kwargs: Any, - ) -> None: - super().__init__( - name=name, - content=content, - url=url, - timestamp=timestamp, - **kwargs, - ) - if echo: - logger.chat(self) - - def to_str(self) -> str: - """Return the string representation of the message""" - return f"{self.name}: {self.content}" - - def serialize(self) -> str: - return json.dumps({"__type": "Msg", **self}) -``` - -## 关于记忆(Memory) - -### 关于记忆基类(`MemoryBase`) - -`MemoryBase` 是一个抽象类,以结构化的方式处理智能体的记忆。它定义了存储、检索、删除和操作 -*信息*内容的操作。 - -```python -class MemoryBase(ABC): - # ... [省略代码以简化] - - def get_memory( - self, - return_type: PromptType = PromptType.LIST, - recent_n: Optional[int] = None, - filter_func: Optional[Callable[[int, dict], bool]] = None, - ) -> Union[list, str]: - raise NotImplementedError - - def add(self, memories: Union[list[dict], dict]) -> None: - raise NotImplementedError - - def delete(self, index: Union[Iterable, int]) -> None: - raise NotImplementedError - - def load( - self, - memories: Union[str, dict, list], - overwrite: bool = False, - ) -> None: - raise NotImplementedError - - def export( - self, - to_mem: bool = False, - file_path: Optional[str] = None, - ) -> Optional[list]: - raise NotImplementedError - - def clear(self) -> None: - raise NotImplementedError - - def size(self) -> int: - raise NotImplementedError -``` - -以下是 `MemoryBase` 的关键方法: - -- **`get_memory`**:这个方法负责从智能体的记忆中检索存储的信息。它可以按照 `return_type` 指定的格式返回这些信息。该方法还可以在提供 `recent_n` 时检索特定数量的最近信息,并且可以应用过滤函数( `filter_func` )来根据自定义标准选择信息。 -- **`add`**:这个方法用于将新的信息添加到智能体的记忆中。它可以接受单个信息或信息列表。每条信息通常是 `MessageBase` 或其子类的实例。 -- **`delete`**:此方法允许通过信息的索引(如果提供可迭代对象,则为索引集合)从记忆中删除信息。 -- **`load`**:这个方法允许从外部来源批量加载信息到智能体的内存中。`overwrite` 参数决定是否在加载新的信息集之前清除现有记忆。 -- **`export`**:这个方法便于将存储的*信息*从智能体的记忆中导出,要么导出到一个外部文件(由 `file_path` 指定),要么直接导入到程序的运行内存中(如果 `to_mem` 设置为 `True` )。 -- **`clear`**:这个方法清除智能体记忆中的所有*信息*,本质上是重置。 -- **`size`**:这个方法返回当前存储在智能体记忆中的信息数量。 - -### 关于`TemporaryMemory` - -`TemporaryMemory` 类是 `MemoryBase` 类的一个具体实现,提供了一个智能体运行期间存在的记忆存储,被用作智能体的默认记忆类型。除了 `MemoryBase` 的所有行为外,`TemporaryMemory` 还提供了检索的方法: - -- **`retrieve_by_embedding`**:基于它们的嵌入向量 (embeddings) 检索与查询最相似的 `messages`。它使用提供的度量标准来确定相关性,并可以返回前 `k` 个最相关的信息。 -- **`get_embeddings`**:返回记忆中所有信息的嵌入向量。如果信息没有嵌入向量,并且提供了嵌入模型,它将生成并存储信息的嵌入向量。 - -有关 `Memory` 和 `Msg` 使用的更多细节,请参考 API 文档。 - -[[返回顶端]](#205-memory-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/206-prompt.md b/docs/sphinx_doc/zh_CN/source/tutorial/206-prompt.md deleted file mode 100644 index 1008214ff..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/206-prompt.md +++ /dev/null @@ -1,488 +0,0 @@ -(206-prompt-zh)= - -# 提示工程 - -提示工程是与大型语言模型(LLMs)相关的应用中至关重要的组件。然而,为大型语言模型(LLMs)制作提示可能具有挑战性,尤其是在面对来自不同模型API的不同需求时。 - -为了帮助开发者更好地适应不同模型API的需求,AgentScope提供了一种结构化的方式来组织不同数据类型(例如指令、提示、对话历史)到所需的格式。 - -请注意这里不存在一个“**适用于所有模型API**”的提示构建方案。 -AgentScope内置策略的目标是**使初学者能够顺利调用模型API ,而不是使应用达到最佳效果**。对于进阶用户,我们强烈建议开发者根据自己的需求和模型API的要求自定义提示。 - -## 构建提示面临的挑战 - -在多智能体应用中,LLM通常在对话中扮演不同的角色。当使用模型的Chat API时,时常会面临以下挑战: - -1. 大多数Chat类型的模型API是为聊天机器人场景设计的,`role`字段只支持`"user"`和`"assistant"`,不支持`name`字段,即API本身不支持角色扮演。 - -2. 一些模型API要求`"user"`和`"assistant"`必须交替发言,而`"user"`必须在输入消息列表的开头和结尾发言。这样的要求使得在一个代理可能扮演多个不同角色并连续发言时,构建多智能体对话变得困难。 - -为了帮助初学者快速开始使用AgentScope,我们为大多数与聊天和生成相关的模型API提供了以下内置策略。 - -## 内置提示策略 - -AgentScope为以下的模型API提供了内置的提示构建策略。 - -- [OpenAIChatWrapper](#openaichatwrapper) -- [DashScopeChatWrapper](#dashscopechatwrapper) -- [DashScopeMultiModalWrapper](#dashscopemultimodalwrapper) -- [OllamaChatWrapper](#ollamachatwrapper) -- [OllamaGenerationWrapper](#ollamagenerationwrapper) -- [GeminiChatWrapper](#geminichatwrapper) -- [ZhipuAIChatWrapper](#zhipuaichatwrapper) - -这些策略是在对应Model Wrapper类的`format`函数中实现的。它接受`Msg`对象,`Msg`对象的列表或它们的混合作为输入。在`format`函数将会把输入重新组织成一个`Msg`对象的列表,因此为了方便解释,我们在下面的章节中认为`format`函数的输入是`Msg`对象的列表。 - -### `OpenAIChatWrapper` - -`OpenAIChatWrapper`封装了OpenAI聊天API,它以字典列表作为输入,其中字典必须遵循以下规则(更新于2024/03/22): - -- 需要`role`和`content`字段,以及一个可选的`name`字段。 -- `role`字段必须是`"system"`、`"user"`或`"assistant"`之一。 - -#### 提示的构建策略 - -##### 非视觉(Vision)模型 - -在OpenAI Chat API中,`name`字段使模型能够区分对话中的不同发言者。因此,`OpenAIChatWrapper`中`format`函数的策略很简单: - -- `Msg`: 直接将带有`role`、`content`和`name`字段的字典传递给API。 -- `List`: 根据上述规则解析列表中的每个元素。 - -样例如下: - -```python -from agentscope.models import OpenAIChatWrapper -from agentscope.message import Msg - -model = OpenAIChatWrapper( - config_name="", # 我们直接初始化model wrapper,因此不需要填入config_name - model_name="gpt-4", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg对象 - [ # Msg对象的列表 - Msg(name="Bob", content="Hi.", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant"), - ], -) -print(prompt) -``` - -```bash -[ - {"role": "system", "name": "system", "content": "You are a helpful assistant"}, - {"role": "assistant", "name": "Bob", "content": "Hi."}, - {"role": "assistant", "name": "Alice", "content": "Nice to meet you!"), -] -``` - -##### 视觉(Vision)模型 - -对支持视觉的模型而言,如果输入消息包含图像url,生成的`content`字段将是一个字典的列表,其中包含文本和图像url。 - -具体来说,如果是网络图片url,将直接传递给OpenAI Chat API,而本地图片url将被转换为base64格式。更多细节请参考[官方指南](https://platform.openai.com/docs/guides/vision)。 - -注意无效的图片url(例如`/Users/xxx/test.mp3`)将被忽略。 - -```python -from agentscope.models import OpenAIChatWrapper -from agentscope.message import Msg - -model = OpenAIChatWrapper( - config_name="", # 为空,因为我们直接初始化model wrapper - model_name="gpt-4o", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg 对象 - [ # Msg 对象的列表 - Msg(name="user", content="Describe this image", role="user", url="https://xxx.png"), - Msg(name="user", content="And these images", role="user", url=["/Users/xxx/test.png", "/Users/xxx/test.mp3"]), - ], -) -print(prompt) -``` - -```python -[ - { - "role": "system", - "name": "system", - "content": "You are a helpful assistant" - }, - { - "role": "user", - "name": "user", - "content": [ - { - "type": "text", - "text": "Describe this image" - }, - { - "type": "image_url", - "image_url": { - "url": "https://xxx.png" - } - }, - ] - }, - { - "role": "user", - "name": "user", - "content": [ - { - "type": "text", - "text": "And these images" - }, - { - "type": "image_url", - "image_url": { - "url": "..." # 对应 /Users/xxx/test.png - } - }, - ] - }, -] -``` - -### `DashScopeChatWrapper` - -`DashScopeChatWrapper`封装了DashScope聊天API,它接受消息列表作为输入。消息必须遵守以下规则: - -- 需要`role`和`content`字段,以及一个可选的`name`字段。 -- `role`字段必须是`"user"`,`"system"`或`"assistant"`之一。 -- 如果一条信息的`role`字段是`"system"`,那么这条信息必须也只能出现在消息列表的开头。 -- `user`和`assistant`必须交替发言。 - -#### 提示的构建策略 - -如果第一条消息的`role`字段是`"system"`,它将被转换为一条消息,其中`role`字段为`"system"`,`content`字段为系统消息。其余的消息将被转换为一条消息,其中`role`字段为`"user"`,`content`字段为对话历史。 - -样例如下: - -```python -from agentscope.models import DashScopeChatWrapper -from agentscope.message import Msg - -model = DashScopeChatWrapper( - config_name="", # 我们直接初始化model wrapper,因此不需要填入config_name - model_name="qwen-max", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg对象 - [ # Msg对象的列表 - Msg(name="Bob", content="Hi!", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant"), - ], -) -print(prompt) -``` - -```bash -[ - {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "## Conversation History\nBob: Hi!\nAlice: Nice to meet you!"}, -] -``` - -### `DashScopeMultiModalWrapper` - -`DashScopeMultiModalWrapper`封装了多模态模型的API,它接受消息列表作为输入,并且必须遵循以下的规则(更新于2024/04/04): - -- 每个消息是一个字段,并且包含`role`和`content`字段。 - - 其中`role`字段取值必须是`"user"`,`"system"`,`"assistant"`之一。 - - `content`字段对应的值必须是字典的列表 - - 每个字典只包含`text`,`image`或`audio`中的一个键值对 - - `text`域对应的值是一个字符串,表示文本内容 - - `image`域对应的值是一个字符串,表示图片的url - - `audio`域对应的值是一个字符串,表示音频的url - - `content`中可以同时包含多个key为`image`的字典或者多个key为`audio`的字典。例如 -```python -[ - { - "role": "user", - "content": [ - {"text": "What's the difference between these two pictures?"}, - {"image": "https://xxx1.png"}, - {"image": "https://xxx2.png"} - ] - }, - { - "role": "assistant", - "content": [{"text": "The first picture is a cat, and the second picture is a dog."}] - }, - { - "role": "user", - "content": [{"text": "I see, thanks!"}] - } -] -``` -- 如果一条信息的`role`字段是`"system"`,那么这条信息必须也只能出现在消息列表的开头。 -- 消息列表中最后一条消息的`role`字段必须是`"user"`。 -- 消息列表中`user`和`assistant`必须交替发言。 - -#### 提示的构建策略 - -基于上述API的限制,构建策略如下: -- 如果输入的消息列表中第一条消息的`role`字段的值是`"system"`,它将被转换为一条系统消息,其中`role`字段为`"system"`,`content`字段为系统消息,如果输入`Msg`对象中`url`属性不为`None`,则根据其类型在`content`中增加一个键值为`"image"`或者`"audio"`的字典。 -- 其余的消息将被转换为一条消息,其中`role`字段为`"user"`,`content`字段为对话历史。并且所有`Msg`对象中`url`属性不为`None`的消息,都会根据`url`指向的文件类型在`content`中增加一个键值为`"image"`或者`"audio"`的字典。 - -样例如下: - -```python -from agentscope.models import DashScopeMultiModalWrapper -from agentscope.message import Msg - -model = DashScopeMultiModalWrapper( - config_name="", # 我们直接初始化model wrapper,因此不需要填入config_name - model_name="qwen-vl-plus", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system", url="url_to_png1"), # Msg对象 - [ # Msg对象的列表 - Msg(name="Bob", content="Hi!", role="assistant", url="url_to_png2"), - Msg(name="Alice", content="Nice to meet you!", role="assistant", url="url_to_png3"), - ], -) -print(prompt) -``` - -```bash -[ - { - "role": "system", - "content": [ - {"text": "You are a helpful assistant"}, - {"image": "url_to_png1"} - ] - }, - { - "role": "user", - "content": [ - {"text": "## Conversation History\nBob: Hi!\nAlice: Nice to meet you!"}, - {"image": "url_to_png2"}, - {"image": "url_to_png3"}, - ] - } -] -``` - -### LiteLLMChatWrapper - -`LiteLLMChatWrapper`封装了litellm聊天API,它接受消息列表作为输入。Litellm支持不同类型的模型,每个模型可能需要遵守不同的格式。为了简化使用,我们提供了一种与大多数模型兼容的格式。如果需要更特定的格式,您可以参考您所使用的特定模型以及[litellm](https://github.com/BerriAI/litellm)文档,来定制适合您模型的格式函数。 -- 格式化聊天历史中的所有消息,将其整合成一个以`"user"`作为`role`的单一消息 -#### 提示策略 -- 消息将包括对话历史,`user`消息由系统消息(system message)和"## Dialog History"前缀。 - - -```python -from agentscope.models import LiteLLMChatWrapper - -model = LiteLLMChatWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="gpt-3.5-turbo", -) - -prompt = model.format( - Msg("system", "You are a helpful assistant", role="system"), - [ - Msg("user", "What is the weather today?", role="user"), - Msg("assistant", "It is sunny today", role="assistant"), - ], -) - -print(prompt) -``` - -```bash -[ - { - "role": "user", - "content": ( - "You are a helpful assistant\n\n" - "## Conversation History\nuser: What is the weather today?\n" - "assistant: It is sunny today" - ), - }, -] -``` - -### `OllamaChatWrapper` - -`OllamaChatWrapper`封装了Ollama聊天API,它接受消息列表作为输入。消息必须遵守以下规则(更新于2024/03/22): - -- 需要`role`和`content`字段,并且`role`必须是`"user"`、`"system"`或`"assistant"`之一。 -- 可以添加一个可选的`images`字段到消息中。 - -#### 提示的构建策略 - -给定一个消息列表,我们将按照以下规则解析每个消息: - -- 如果输入的第一条信息的`role`字段是`"system"`,该条信息将被视为系统提示(system - prompt),其他信息将一起组成对话历史。对话历史将添加`"## Conversation History"`的前缀,并与 -系统提示一起组成一条`role`为`"system"`的信息。 -- 如果输入信息中的`url`字段不为`None`,则这些url将一起被置于`"images"`对应的键值中。 - -```python -from agentscope.models import OllamaChatWrapper - -model = OllamaChatWrapper( - config_name="", # 我们直接初始化model wrapper,因此不需要填入config_name - model_name="llama2", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg对象 - [ # Msg对象的列表 - Msg(name="Bob", content="Hi.", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant", url="https://example.com/image.jpg"), - ], -) - -print(prompt) -``` - -```bash -[ - { - "role": "system", - "content": "You are a helpful assistant\n\n## Conversation History\nBob: Hi.\nAlice: Nice to meet you!", - "images": ["https://example.com/image.jpg"] - }, -] -``` - -### `OllamaGenerationWrapper` - -`OllamaGenerationWrapper`封装了Ollama生成API,它接受字符串提示作为输入,没有任何约束(更新于2024/03/22)。 - -#### 提示的构建策略 - -如果第一条消息的`role`字段是`"system"`,那么它将会被转化成一条系统提示。其余消息会被拼接成对话历史。 - -```python -from agentscope.models import OllamaGenerationWrapper -from agentscope.message import Msg - -model = OllamaGenerationWrapper( - config_name="", # 我们直接初始化model wrapper,因此不需要填入config_name - model_name="llama2", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg对象 - [ # Msg对象的列表 - Msg(name="Bob", content="Hi.", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant"), - ], -) - -print(prompt) -``` - -```bash -You are a helpful assistant - -## Conversation History -Bob: Hi. -Alice: Nice to meet you! -``` - -### `GeminiChatWrapper` - -`GeminiChatWrapper`封装了Gemini聊天API,它接受消息列表或字符串提示作为输入。与DashScope聊天API类似,如果我们传递消息列表,它必须遵守以下规则: - -- 需要`role`和`parts`字段。`role`必须是`"user"`或`"model"`之一,`parts`必须是字符串列表。 -- `user`和`model`必须交替发言。 -- `user`必须在输入消息列表的开头和结尾发言。 - -当代理可能扮演多种不同角色并连续发言时,这些要求使得构建多代理对话变得困难。 -因此,我们决定在内置的`format`函数中将消息列表转换为字符串提示,并且封装在一条user信息中。 - -#### 提示的构建策略 - -如果第一条消息的`role`字段是`"system"`,那么它将会被转化成一条系统提示。其余消息会被拼接成对话历史。 - -**注意**Gemini Chat API中`parts`字段可以包含图片的url,由于我们将消息转换成字符串格式 -的输入,因此图片url在目前的`format`函数中是不支持的。 -我们推荐开发者可以根据需求动手定制化自己的提示。 - -```python -from agentscope.models import GeminiChatWrapper -from agentscope.message import Msg - -model = GeminiChatWrapper( - config_name="", # 我们直接初始化model wrapper,因此不需要填入config_name - model_name="gemini-pro", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg对象 - [ # Msg对象的列表 - Msg(name="Bob", content="Hi.", role="model"), - Msg(name="Alice", content="Nice to meet you!", role="model"), - ], -) - -print(prompt) -``` - -```bash -[ - { - "role": "user", - "parts": [ - "You are a helpful assistant\n## Conversation History\nBob: Hi!\nAlice: Nice to meet you!" - ] - } -] -``` - - -### `ZhipuAIChatWrapper` - -`ZhipuAIChatWrapper`封装了ZhipuAi聊天API,它接受消息列表或字符串提示作为输入。与DashScope聊天API类似,如果我们传递消息列表,它必须遵守以下规则: - -- 必须有 role 和 content 字段,且 role 必须是 "user"、"system" 或 "assistant" 中的一个。 -- 至少有一个 user 消息。 - -当代理可能扮演多种不同角色并连续发言时,这些要求使得构建多代理对话变得困难。 -因此,我们决定在内置的`format`函数中将消息列表转换为字符串提示,并且封装在一条user信息中。 - -#### 提示的构建策略 - -如果第一条消息的 role 字段是 "system",它将被转换为带有 role 字段为 "system" 和 content 字段为系统消息的单个消息。其余的消息会被转化为带有 role 字段为 "user" 和 content 字段为对话历史的消息。 -下面展示了一个示例: - -```python -from agentscope.models import ZhipuAIChatWrapper -from agentscope.message import Msg - -model = ZhipuAIChatWrapper( - config_name="", # empty since we directly initialize the model wrapper - model_name="glm-4", - api_key="your api key", -) - -prompt = model.format( - Msg("system", "You're a helpful assistant", role="system"), # Msg object - [ # a list of Msg objects - Msg(name="Bob", content="Hi!", role="assistant"), - Msg(name="Alice", content="Nice to meet you!", role="assistant"), - ], -) -print(prompt) -``` - -```bash -[ - {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "## Conversation History\nBob: Hi!\nAlice: Nice to meet you!"}, -] -``` - -[[返回顶端]](#206-prompt-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/207-monitor.md b/docs/sphinx_doc/zh_CN/source/tutorial/207-monitor.md deleted file mode 100644 index f923cf2f6..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/207-monitor.md +++ /dev/null @@ -1,126 +0,0 @@ -(207-monitor-zh)= - -# 监控 - -AgentScope 支持模型 API 使用情况的监控。 -用户可以通过在 `agentscope.init` 中设置 `use_monitor=False` 来禁用监控功能。 - -AgentScope 提供了 `agentscope.state_dict` 和 `agentscope.print_llm_usage` 两个函数,用于获取当前 AgentScope 状态和打印模型 API 的使用情况。 - -示例代码如下: - -```python - -import agentscope - -# ... - -# 获取当前监控状态 -state_dict = agentscope.state_dict() - -# 打印模型 API 的使用情况 -agentscope.print_llm_usage() -``` - -以下是一个 `state_dict` 的示例: - -```json -{ - "project": "zSZ0pO", - "name": "7def6u", - "run_id": "run_20240731-104527_7def6u", - "pid": 24727, - "timestamp": "2024-07-31 10:45:27", - "disable_saving": false, - "file": { - "save_log": false, - "save_code": false, - "save_api_invoke": false, - "base_dir": null, - "run_dir": "/xxx/runs/run_20240731-104527_7def6u", - "cache_dir": "/Users/xxx/.cache/agentscope" - }, - "model": { - "model_configs": {} - }, - "logger": { - "level": "INFO" - }, - "studio": { - "active": false, - "studio_url": null - }, - "monitor": { - "use_monitor": true, - "path_db": "/.../runs/run_20240731-104527_7def6u/agentscope.db" - } -} -``` - -- 调用 `agentscope.print_llm_usage` 时,AgentScope 将打印模型使用情况如下: - -```text -2024-08-05 15:21:54.889 | INFO | agentscope.manager._monitor:_print_table:117 - Text & Embedding Model: -2024-08-05 15:21:54.889 | INFO | agentscope.manager._monitor:_print_table:127 - | MODEL NAME | TIMES | PROMPT TOKENS | COMPLETION TOKENS | TOTAL TOKENS | -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:127 - | gpt-4-turbo | 1 | 15 | 20 | 35 | -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:127 - | gpt-4o | 1 | 43 | 34 | 77 | -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:127 - | qwen-max | 2 | 129 | 172 | 301 | -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:117 - Image Model: -2024-08-05 15:21:54.890 | INFO | agentscope.manager._monitor:_print_table:127 - | MODEL NAME | RESOLUTION | TIMES | IMAGE COUNT | -2024-08-05 15:21:54.891 | INFO | agentscope.manager._monitor:_print_table:127 - | dall-e-3 | hd_1024*1024 | 1 | 2 | -2024-08-05 15:21:54.891 | INFO | agentscope.manager._monitor:_print_table:127 - | dall-e-3 | standard_1024*1024 | 2 | 7 | -2024-08-05 15:21:54.891 | INFO | agentscope.manager._monitor:_print_table:127 - | qwen-vl | 1024*1024 | 1 | 4 | -``` - -- 同时还可以获得 JSON 格式的模型 API 使用情况,如下所示: - -```python -# print(json.dumps(agentscope.print_llm_usage(), indent=4)) -{ - "text_and_embedding": [ - { - "model_name": "gpt-4-turbo", - "times": 1, - "prompt_tokens": 15, - "completion_tokens": 20, - "total_tokens": 35 - }, - { - "model_name": "gpt-4o", - "times": 1, - "prompt_tokens": 43, - "completion_tokens": 34, - "total_tokens": 77 - }, - { - "model_name": "qwen-max", - "times": 2, - "prompt_tokens": 129, - "completion_tokens": 172, - "total_tokens": 301 - } - ], - "image": [ - { - "model_name": "dall-e-3", - "resolution": "hd_1024*1024", - "times": 1, - "image_count": 2 - }, - { - "model_name": "dall-e-3", - "resolution": "standard_1024*1024", - "times": 2, - "image_count": 7 - }, - { - "model_name": "qwen-vl", - "resolution": "1024*1024", - "times": 1, - "image_count": 4 - } - ] -} -``` - -[[Return to the top]](#207-monitor-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/208-distribute.md b/docs/sphinx_doc/zh_CN/source/tutorial/208-distribute.md deleted file mode 100644 index a8999a4b5..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/208-distribute.md +++ /dev/null @@ -1,476 +0,0 @@ -(208-distribute-zh)= - -# 分布式 - -为了提供更好的性能以及支持更多的 Agent 同时运行,AgentScope 实现了基于 Actor 范式的 并行/分布式模式(后续简称为分布式模式)。该模式相比传统单进程模式具有以下特点: - -- **高性能**: 同一应用中的不同 Agent 以及其他服务可以运行不同进程甚至不同机器上,充分利用计算资源释放性能。 -- **自动并行化**: 基于 Actor 模式,每个 Agent都具有独立的状态,在编写应用时无需考虑调用顺序、资源竞争等问题,自动实现应用并行化。 -- **零迁移成本**: 代码与单机模式完全兼容,单机模式可运行的应用可以零成本直接迁移至并行/分布式模式。 - -本节将详细介绍 AgentScope 分布式的使用方法并阐述其原理。 - -(basic_usage-zh)= - -## 基础用法 - -分布式模式相比传统模式对运行代码几乎没有任何修改,仅需要在 Agent 初始化阶段调用 {func}`to_dist` 函数即可。 - -```python -# import some packages - -# init agentscope - -# 传统模式下的初始化 -# agent = Agent(...) - -# 分布式模式下的初始化 -agent = Agent(...).to_dist() - -x = Msg(...) -y = agent(x) -``` - -本节接下来将以一个网页检索的案例来展示具体如何使用 AgentScope 的分布式模式。 -为了突出 AgentScope 分布式模式所能带来的加速效果,这里使用了一个简单的自定义 `WebAgent`。 -该 Agent 会用 sleep 5 秒来模拟爬取网页并从中寻找问题答案的过程,样例中共有 5 个 Agent,每个 Agent 都会爬取一个网页并寻找问题答案。 - -传统模式与分布式模式的区别仅在与初始化阶段,即 `init_without_dist` 和 `init_with_dist`。 -`init_with_dist` 函数相较于 `init_without_dist` 的唯一区别在于额外调用了 `to_dist` 函数。 -在初始化完成后具体运行部分的代码完全相同,都是 `run` 函数,但两种模式的运行耗时却有较大差异。 - -```python -# 请不要使用 jupyter notebook 运行该代码 -# 请将代码拷贝到 `dist_main.py` 文件后使用 `python dist_main.py` 命令运行该代码 -# 运行该代码前请先安装 agentscope 的分布式版本 -# pip install agentscope[distribute] - -import time -import agentscope -from agentscope.agents import AgentBase -from agentscope.message import Msg - -class WebAgent(AgentBase): - - def __init__(self, name): - super().__init__(name) - - def get_answer(self, url: str, query: str): - """模拟爬取网页并从中寻找问题答案""" - time.sleep(5) - return f"Answer from {self.name}" - - def reply(self, x: dict = None) -> dict: - return Msg( - name=self.name, - role="assistant", - content=self.get_answer(x.content["url"], x.content["query"]) - ) - - -QUERY = "example query" -URLS = ["page_1", "page_2", "page_3", "page_4", "page_5"] - -def init_without_dist(): - return [WebAgent(f"W{i}") for i in range(len(URLS))] - - -def init_with_dist(): - return [WebAgent(f"W{i}").to_dist() for i in range(len(URLS))] - - -def run(agents): - start = time.time() - results = [] - for i, url in enumerate(URLS): - results.append(agents[i].reply( - Msg( - name="system", - role="system", - content={ - "url": url, - "query": QUERY - } - ) - )) - for result in results: - print(result.content) - end = time.time() - return end - start - - -if __name__ == "__main__": - agentscope.init() - start = time.time() - simple_agents = init_without_dist() - dist_agents = init_with_dist() - end = time.time() - print(f"初始化的耗时:{end - start}") - print(f"不使用分布式模式的耗时:{run(simple_agents)}") - print(f"使用分布式模式的耗时:{run(dist_agents)}") -``` - -上述代码的输出样例如下: - -```text -初始化的耗时:12.944042921066284 -[W0] Answer from page_1 -[W1] Answer from page_2 -[W2] Answer from page_3 -[W3] Answer from page_4 -[W4] Answer from page_5 -不使用分布式模式的耗时:25.022241830825806 -[W0] Answer from page_1 -[W1] Answer from page_2 -[W2] Answer from page_3 -[W3] Answer from page_4 -[W4] Answer from page_5 -使用分布式模式的耗时:5.021369934082031 -``` - -从上述输出中可以观察到,在采用分布式模式后,运行速度有明显的提升(从 25 s 降低到 5 s)。 -上述样例也是 AgentScope 分布式模式最常见的使用用法,在不追求极致性能的性能且 Agent 数量相对较少(例如不超过 10 个)的情况下,建议采用直接采用上述方法。 -而如果需要进一步优化性能,则需要对 AgentScope 分布式模式有更加深入的了解,下面的章节我们将具体介绍 AgentScope 分布式模式中的进阶使用方法。 - -## 进阶用法 - -本节将介绍 AgentScope 分布式模式的进阶使用方法,以进一步提升运行效率。在介绍进阶用法之前,我们需要先对 AgentScope 分布式模式的基本概念有一些初步认识。 - -### 基本概念 - -- **主进程 (Main Process)**: AgentScope 应用程序所在的进程被称为主进程。例如上一节例子中的 `run` 函数就是在主进程中运行的。每个 AgentScope 应用中只会有一个主进程。 -- **智能体服务器进程 (Agent Server Process)**: AgentScope 智能体服务器进程是分布式模式下 Agent 所运行的进程。例如上一节的例子中 `dist_agents` 中的所有 Agent 的本体实际上都运行于智能体服务器进程中。AgentScope 智能体服务器进程可以存在多个。智能体服务器进程可以运行在任意网络可达的机器上,并且每个智能体服务器进程中都可以同时运行多个 Agent。 - -- **子进程模式 (Child Mode)**: 在子进程模式下,智能体服务器进程由主进程启动的子进程。例如上一节的例子中,`dist_agents` 中的每个 Agent 实际上都是主进程的子进程。该模式是 AgentScope 分布式的默认运行模式,即直接调用 `to_dist` 函数不给定任何参数时会默认使用该模式,[基础用法](#basic_usage-zh)部分采用的就是这种模式。 -- **独立进程模式 (Independent Mode)**: 在独立进程模式下,智能体进程相对主进程来说是独立的,需要预先在机器上启动智能体进程,并向 `to_dist` 函数传入一些特定的参数。如果需要实现 Agent 跨机器部署,必须使用该模式,另外如果对性能要求较高或是 Agent 数量较多也建议使用该模式。 - -### 使用独立进程模式 - -与子进程模式相比,独立进程模式能够避免子进程初始化的开销,从而消除运行初期的延迟,对于 Agent 数量较多的场景能够有效提升运行效率。 - -独立进程模式下,需要在机器上提前启动智能体服务器进程,并且向 `to_dist` 函数传入需要连接的智能体服务进程的 `host` 以及 `port`。 -这里我们依旧使用基础用法部分的案例来演示,假设[基础用法](#basic_usage-zh)部分的代码文件为 `dist_main.py`,需要将如下代码保存为 `dist_server.py`。 - -```python -# 请不要使用 jupyter notebook 运行该代码 -# 请将代码拷贝到 `dist_server.py` 文件后使用 `python dist_server.py` 命令运行该代码, 目录结构如下: -# your_project_dir -# ├── dist_main.py -# └── dist_server.py -# 运行该代码前请先安装 agentscope 的分布式版本 -# pip install agentscope[distribute] - -import agentscope -from agentscope.server import RpcAgentServerLauncher -from dist_main import WebAgent - - -if __name__ == "__main__": - agentscope.init( - # model_configs=... # 模型配置,如果不需要模型,可以不设置该参数 - ) - assistant_server_launcher = RpcAgentServerLauncher( - host="localhost", - port=12345, - custom_agent_classes=[WebAgent], - ) - assistant_server_launcher.launch(in_subprocess=False) - assistant_server_launcher.wait_until_terminate() -``` - -上述代码中,我们通过 `RpcAgentServerLauncher` 启动了一个智能体服务器进程,需要注意的是由于 `WebAgent` 不是 AgentScope 自带的 Agent 实现,需要将 `WebAgent` 添加到 `custom_agent_classes` ,才能在智能体服务器进程中创建该类型的 Agent。另外如果智能体服务器进程中需要使用模型 API,则需要在 `agentscope.init` 中配置对应的模型参数。 - -同时还需要将 `dist_main.py` 中的 `init_with_dist` 更新为下面的代码: - -```python -def init_with_dist(): - return [WebAgent(f"W{i}").to_dist(host="localhost", port=12345) for i in range(len(URLS))] -``` - -这里新版本的 `init_with_dist` 相比原版本新增了 `host` 与 `port` 两个参数,用于连接智能体服务器进程。 - -在代码修改完成后,先在一个命令行窗口中运行 `dist_server.py` 文件,等待启动成功后在另一个命令行窗口运行 `dist_main.py` 文件,运行的时候会看到如下输出: - -```text -初始化的耗时:0.005397319793701172 -[W0] Answer from page_1 -[W1] Answer from page_2 -[W2] Answer from page_3 -[W3] Answer from page_4 -[W4] Answer from page_5 -不使用分布式模式的耗时:25.023009061813354 -[W0] Answer from page_1 -[W1] Answer from page_2 -[W2] Answer from page_3 -[W3] Answer from page_4 -[W4] Answer from page_5 -使用分布式模式的耗时:5.021481990814209 -``` - -此时的 `dist_main.py` 初始化的耗时将会明显减少,例如这里的耗时仅为 0.005 s。 - -### 避免重复初始化 - -上面的代码中都是在一个已经初始化完成的 Agent 上调用 `to_dist` 函数。 -`to_dist` 本质上是将原 Agent 克隆到智能体服务器进程中,并在主进程中保留一个 {class}`RpcObject` 作为原 Agent 的代理,对该 `RpcObject`的调用都会转发到智能体服务器进程中的对应 Agent 上。 - -这样的流程存在一个潜在问题,即原 Agent 被初始化了两次,一次是在主进程中,一次是在智能体服务器进程中,并且这两次初始化是依次执行的,无法通过并行加速。对于初始化成本比较低的 Agent,直接调用 `to_dist` 函数不会对性能产生明显影响,但是对于初始化成本较高的 Agent,则需要尽量避免重复初始化行为,为此 AgentScope 分布式模式提供了另一种分布式模式的初始化方法,即直接在任意 Agent 的初始化函数内部传入 `to_dist` 参数,例如下面的代码就是对 `dist_main.py` 的`init_with_dist` 函数的修改。 - -- 对于子进程模式,只需要在初始化函数中传入 `to_dist=True` 即可。 - - ```python - def init_with_dist(): - return [WebAgent(f"W{i}", to_dist=True) for i in range(len(URLS))] - ``` - -- 对于独立进程模式,则需要将原来传入`to_dist`函数的参数以字典的形式传入到 `to_dist` 域中。 - - ```python - def init_with_dist(): - return [WebAgent(f"W{i}", to_dist={"host": "localhost", "port": "12345"}) for i in range(len(URLS))] - ``` - -```{note} -一些 IDE 的自动补全功能可能提示 `to_dist` 参数不存在,但实际运行时并不会报错。 -另外,如果已经在初始化参数中传入了 `to_dist`,则不能再调用 `to_dist` 方法。 -``` - -## 开发者指南 - -```{note} -本节主要面向基于 AgentScope 分布式模式开发新功能的开发者,需要开发者有一定的分布式编程基础,对进程、线程、同步、异步、gRPC、Python 元类以及GIL等概念有一定的理解。但即使没有上述基础,通过阅读本节也能学到 AgentScope 分布式模式的基本原理以及一些高级用法。 -``` - -AgentScope 分布式模式的主要逻辑是: - -**将原本运行在任意 Python 进程中的对象通过 `to_dist` 函数或是初始化参数转移到 RPC 服务器中运行,并在原进程中保留一个 `RpcObject` 作为代理,任何 `RpcObject` 上的函数调用或是属性访问都会转发到 RPC 服务器中的对象上,并且在调用函数时可以自行决定是使用同步调用还是异步调用。** - -下图展示了`to_dist`初始化、同步函数调用以及异步函数调用的交互流程: - -```{mermaid} -sequenceDiagram - User -->> Process: initialize - Process -->> RPC Server: to_dist - User -->> Process: sync function call - Process -->> RPC Server: sync function call - RPC Server -->> RPC Server: calculate result - RPC Server -->> Process: sync result - Process -->> User: sync result - User -->> Process: async function call - Process -->> RPC Server: async function call - RPC Server -->> RPC Server: calculate result - User -->> Process: get async result - Process -->> RPC Server: get async result - RPC Server -->> Process: async result - Process -->> User: async result -``` - -从上图可以观察到 AgentScope 分布式模式本质是一个 Client-Server 架构,用户编写的智能体应用(Process)作为Client 端,而智能体服务器进程(RPC Server)作为 Server 端。分布式模式下 Client 端将本地的智能体发送到 Server 端运行,并将本地的函数调用以及属性访问转发到 Server 端,而 Server 端则负责接收 Client 端发送的对象,并接收 Client 端发来的各种调用请求。 - -```{note} -AgentScope 分布式模式中 Client 与 Server 通信基于 gRPC 实现,对发送消息的大小有严格的限制,默认情况下单条消息不能超过 32 MB。可以通过修改 `src/agentscope/constants.py` 中的 `_DEFAULT_RPC_OPTIONS` 参数来进一步扩大该值。 -``` - -接下来将分别介绍 Client 端以及 Server 端的实现。 - -### Client 端 - -Client 主要包含 `RpcMeta`、`RpcObject` 两个主要类,其中 `RpcMeta` 负责将本地对象发送到 Server 端运行,而 `RpcObject` 则负责后续的各种请求调用的转发。 - -#### `RpcMeta` - -{class}`RpcMeta` 类是一个元类(Meta class),会自动向继承自己的子类添加 `to_dist` 方法以及 `to_dist` 初始化参数 (因此 IDE 可能会提示 `to_dist` 参数不存在,但实际运行时并不会报错),其实现位于 `src/agentscope/rpc/rpc_meta.py`。 - -在一个已经初始化完成的对象上调用 `to_dist` 方法会将原对象的初始化参数打包发送到 智能体服务器进程 中,并在智能体服务器进程中重新初始化该对象,而在主进程中会返回一个 `RpcObject` 替代原有的对象。 - -由于是使用初始化参数来重建原有对象,无法维持创建后的状态变化,因此建议在初始化的同时立即调用 `to_dist` 方法,或者直接在原对象的初始化函数中传入 `to_dist` 参数。 - -由于 `to_dist` 是 `RpcMeta` 自动向子类添加的方法,因此不仅是 Agent 类,任何继承自 `RpcMeta` 的类都可以使用 `to_dist` 方法。 - -`RpcMeta` 除了提供 `to_dist` 方法外还会记录原对象上能够被调用的方法以及属性,以方便在 `RpcObject` 中调用。默认情况下只会记录原对象上的公有方法,并且使用同步调用 (调用时会阻塞调用发起方,直到原对象上的方法执行完毕)。如果需要使用异步调用需要在方法声明上添加 `async_func` 装饰器。 - -#### `async_func` 和 `AsyncResult` - -{func}`async_func` 装饰器的实现位于 `src/agentscope/rpc/rpc_meta.py`。`AgentBase` 及其所有子类的 `__call__` 以及 `reply` 方法都被标记为了 `async_func` 以避免阻塞。 - -与 `async_func` 相对的还有 {func}`sync_func` 装饰器,用于标识同步方法。但由于同步方法为默认情况,因此一般不需要显式标注。 - -如下是一个简单的示例,这里声明了一个 `Example` 类,其中 `sync_method` 是同步方法,`async_method_basic` 以及 `async_method_complex` 被标记为了异步方法,`_protected_method` 是私有方法。 - -```python -import time -from agentscope.rpc import RpcMeta, async_func - - -class Example(metaclass=RpcMeta): - - # @sync_func # 默认即为 sync_func,可以不添加 - def sync_method(self) -> str: - # 同步方法,调用者会被阻塞 1 s - time.sleep(1) - return "sync" - - @async_func - def async_method_basic(self) -> str: - # 异步方法,调用者不会被阻塞,可以继续执行直到尝试获取结果 - time.sleep(1) - # 返回一个基本类型 - return "async" - - @async_func - def async_method_composite(self) -> dict: - # 异步方法 - time.sleep(1) - # 返回一个字典 - return {"a": 1, "b": 2, "c": "hello world",} - - def _protected_method(self) -> str: - # 不是公有方法,rpc object 无法调用该方法 - time.sleep(1) - return "protected" - - -if __name__ == "__main__": - example = Example(to_dist=True) - # 访问 protected 方法会引发未定义行为,请避免使用 - # protected_result = example._protected_method() - t1 = time.time() - sync_result = example.sync_method() - assert sync_result == "sync" - t2 = time.time() - print(f"Sync func cost: {t2 - t1} s") - t3 = time.time() - async_basic = example.async_method_basic() - async_composite = example.async_method_composite() - t4 = time.time() - print(f"Async func cost: {t4 - t3} s") - # 基本类型需要在返回值上调用 result 方法获取异步执行结果 - assert async_basic.result() == "async" - # 复合类型在访问所需要的域时自动更新异步执行结果 - assert async_composite["a"] == 1 - assert async_composite["b"] == 2 - assert async_composite["c"] == "hello world" -``` - -上述代码的运行结果样例如下,可以观察到调用 `async_method` 的耗时比 `sync_method` 短很多,这是因为 `async_method` 是异步方法,不会阻塞调用发起方,而 `sync_method` 是同步方法,因此会阻塞调用发起方。 - -```text -Sync func cost: 1.0073761940002441 s -Async func cost: 0.0003597736358642578 s -``` - -上述代码中 `async_method_basic` 以及 `async_method_complex` 返回的是 {class}`AsyncResult` 对象,该对象可以通过 `result` 方法获取异步执行结果。为了让异步与同步调用的接口尽可能统一,如果 `AsyncResult` 所代表的结果是复合类型,就不再需要手动调用 `result` 方法,在访问内部属性时会自动调用 `result` 更新执行结果 (如上述代码中 `async_composite` 所示)。 - -#### `RpcObject` - -{class}`RpcObject` 的实现位于 `src/agentscope/rpc/rpc_object.py` 中。 -`RpcObject` 是一个代理,其内部并不包含原对象的任何属性值或是方法,只记录了原对象所在的智能体服务器的地址以及该对象的 `id`,通过这些参数,`RpcObject` 可以通过网络连接原对象,从而实现对原对象的调用。 - -当用户调用 `RpcObject` 上的方法或访问属性时,`RpcObject` 会通过 `__getattr__` 方法将请求转发到位于智能体服务器进程的原对象上。对于调用同步方法 (`@sync_func`) 或是访问属性值的情况,`RpcObject` 会阻塞调用发起方,直到原对象上的方法执行完毕,并返回执行结果。而异步方法 (`@async_func`) 则会立即返回一个 {class}`AsyncResult` 对象,如果主进程不去访问该对象的具体值就可以无阻塞地继续运行,而如果需要获取执行结果,则需要调用 `AsyncResult` 对象上的 `result` 方法,如果此时结果还没有返回,`result` 方法会阻塞调用发起方,直到结果返回。 - -```{note} -`RpcObject` 在初始化时如果发现没有提供 `host` 和 `port` 参数 (即子进程模式),就会去启动一个新的智能体服务器进程,并在该进程上重新创建原对象,而启动新的智能体服务器进程相对缓慢,这也是导致子进程模式初始化时间较长的主要原因。 -而如果提供了 `host` 和 `port` 参数 (即独立进程模式),`RpcObject` 就会直接连接该服务器并重新创建原对象,避免了启动新进程的开销。 -``` - -### Server 端 - -Server 端主要基于 gRPC 实现,主要包含 `AgentServerServicer` 和 `RpcAgentServerLauncher` 这两个类。 - -#### `AgentServerLauncher` - -`AgentServerLauncher` 的实现位于 `src/agentscope/server/launcher.py`,用于启动 gRPC Server 进程。 -具体来说,为了保证启动的 Server 进程中能够正确地重新初始化 Client 端发来的对象并正确调用模型API服务,需要在启动 Server 时注册在运行中可能用到的所有 `RpcMeta` 的子类,并且正确设置模型配置。具体来说有两种启动方法,分别是通过代码启动,和通过命令行指令启动。 - -- 通过代码启动的具体方法如下,需要指定 `host` 和 `port`,以及 `custom_agent_classes`,并且需要在调用 `agentscope.init` 时传入需要使用的模型配置。这里假设有 `AgentA`,`AgentB`,`AgentC` 这三个自定义类需要被注册,并且 `AgentA`,`AgentB`,`AgentC` 这三个类都位于 `myagents.py` 文件中且都是 `AgentBase` 的子类。 - - ```python - import agentscope - from agentscope.server import RpcAgentServerLauncher - from myagents import AgentA, AgentB, AgentC - - - MODEL_CONFIGS = {} - - HOST = "localhost" - PORT = 12345 - CUSTOM_CLASSES = [AgentA, AgentB, AgentC] - - if __name__ == "__main__": - agentscope.init( - model_configs=MODEL_CONFIGS, - ) - launcher = RpcAgentServerLauncher( - host=HOST, - port=PORT, - custom_agent_classes=CUSTOM_CLASSES, - ) - launcher.launch(in_subprocess=False) - launcher.wait_until_terminate() - ``` - -- 通过命令行启动的具体方法如下,除了需要指定 `host` 和 `port` 外,还需要指定 `model_config_path` 和 `agent_dir`,分别对应模型配置文件路径和自定义 Agent 类所在的目录。在安装 `agentscope` 时默认会安装 `as_server` 指令,所以可以直接在命令行中使用该指令。 - - ```shell - as_server start --host localhost --port 12345 --model-config-path model_config_path --agent-dir parent_dir_of_myagents.py - ``` - -```{warning} -`AgentServerLauncher` 会加载并执行自定义的 Python 对象,在使用前请仔细检查被加载的对象,如果其中包含恶意代码可能会对系统造成严重损害。 -`AgentServerLauncher` 类还存在一个 `local_mode` 参数用于表示是否只允许本地访问,默认为 `True`,如果需要允许其他机器访问,则需要设置为 `False`。为了避免网络攻击,建议仅在可信的网络环境下使用。 -``` - -#### `AgentServerServicer` - -`AgentServerServicer` 的实现位于 `src/agentscope/server/servicer.py`,是 gRPC 服务的实现类,负责具体接收并处理 Client 端发来的各种请求。 - -其中的 `create_agent` 方法会在 Client 端对某个 `RpcMeta` 的子类对象使用 `to_dist` 时被调用,并在 server 内部重新创建原对象,并以 `id` 为键将对象保存在 `agent_pool` 域中。 - -而 `call_agent_func` 方法会在 Client 端调用 `RpcObject` 对象上的方法或属性时被调用,输入参数中包含了被调用对象的 `id` 以及被调用方法的名称,具体的调用流程有一定差异。对于同步方法以及属性访问,`call_agent_func` 会直接从 `agent_pool` 取出对象并调用对应方法或属性,并在返回结果前阻塞调用发起方。对于异步方法,`call_agent_func` 会将输入参数打包放入任务队列中,并立即返回该任务的 `task_id` 从而避免阻塞调用发起方。 - -`AgentServerServicer` 内部包含了一个执行器池 (`executor`) 用于自动执行任务队列中提交的任务 (`_process_task`),并执行将结果放入 `result_pool` 中,`AsyncResult` 的 `result` 方法会尝试从 `result_pool` 中取出对应任务的结果,如果任务结果不存在则会阻塞调用发起方,直到结果返回。 - -##### `executor` - -executor 是一个线程池 (`concurrent.futures.ThreadPoolExecutor`),其中的线程数量由 `capacity` 参数决定,`capacity` 的设置对运行效率的影响巨大,需要根据具体任务来针对性设置。 -为了让 Server 中的各个 Agent 能够并发执行,最好保证 `capacity` 大于 `AgentServerServicer` 中同时运行的 Agent 的数量,否则可能会导致运行时间成倍增加,甚至在一些特殊场景 (多个 agent 之间进行递归调用) 中出现死锁现象。 - -`capacity` 参数可以在 `as_server` 命令中通过 `--capacity` 指定,或是直接在 `RpcAgentServerLauncher` 初始化时指定。 - -```python -# ... -launcher = RpcAgentServerLauncher( - host="localhost", - port=12345, - custom_agent_classes=[], - capacity=10, -) -``` - -```shell -as_server start --host localhost --port 12345 --model-config-path model_config_path --agent-dir parent_dir_of_myagents --capacity 10 -``` - -##### `result_pool` - -`ResultPool` 的实现位于 `src/agentscope/server/async_result_pool.py`,用于管理异步方法的执行结果,目前有两种实现分别为 `local` 和 `redis`。其中 `local` 基于 Python 的字典类型 (`dict`) 实现,而 `redis` 则是基于 Redis 实现。为了避免结果占用过多内存两种实现都包含了过期自动删除机制,其中 `local` 可以设置超时删除 (`max_expire_time`) 或超过条数删除 (`max_len`),而 `redis` 则仅支持超时删除 (`max_expire_time`)。 -在启动 `AgentServerLauncher` 时可以通过传入 `pool_type` 来指定使用哪种实现,默认为`local`。 -如果指定为 `redis` 则还必须传入 `redis_url`,如下是代码以及命令行的使用案例。 - -```python -# ... -launcher = RpcAgentServerLauncher( - host="localhost", - port=12345, - custom_agent_classes=[], - pool_type="redis", - redis_url="redis://localhost:6379", - max_expire_time=7200, # 2 hours -) -``` - -```shell -as_server start --host localhost --port 12345 --model-config-path model_config_path --agent-dir parent_dir_of_myagents --pool-type redis --redis-url redis://localhost:6379 --max-expire-time 7200 -``` - -[[回到顶部]](#208-distribute-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/209-gui.md b/docs/sphinx_doc/zh_CN/source/tutorial/209-gui.md deleted file mode 100644 index 20c945429..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/209-gui.md +++ /dev/null @@ -1,206 +0,0 @@ -(209-gui-zh)= - -# AgentScope Studio - -AgentScope Studio 是一个开源的 Web UI 工具包,用于构建和监控多智能体应用程序。它提供以下功能: - -- **Dashboard**:一个用户友好的界面,可以在其中监视正在运行的应用程序,并查看运行历史。 -- **Workstation**:一个强大的界面,可通过**拖拽**的方式构建多智能体应用程序。 -- **Server Manager**:一个简单易用的监控与管理工具,用于管理大规模分布式的多智能体应用程序。 -- **Gallery**:即将推出! - -## 启动 AgentScope Studio - -要启动 Studio,请首先确保已安装了最新版本的 AgentScope。然后,可以通过运行以下 Python 代码: - -```python -import agentscope - -agentscope.studio.init() -``` - -或者,可以在终端中运行以下命令: - -```bash -as_studio -``` - -之后,可以在 `http://127.0.0.1:5000` 访问 AgentScope Studio。 - -当然,AgentScope Studio的 IP 地址和端口都可以更改,并且可以通过以下方式引入应用的运行记录: - -```python -import agentscope - -agentscope.studio.init( - host="127.0.0.1", # AgentScope Studio 的 IP 地址 - port=5000, # AgentScope Studio 的端口号 - run_dirs = [ # 您的运行历史目录 - "xxx/xxx/runs", - "xxx/xxx/runs" - ] -) -``` - -## Dashboard - -Dashboard 是一个 Web 界面,用于监视正在运行的应用程序,并查看运行历史。 - -## 注意 - -目前,Dashboard 存在以下限制,我们正在努力改进。欢迎任何反馈、贡献或建议! - -- 运行的应用程序和 AgentScope Studio 必须在同一台机器上运行,以保持 URL 和路径的一致性。如果需要在其它机器上访问 AgentScope Studio,可以尝试通过在远程机器上运行以下命令将端口转发到远程机器: - - ```bash - # 假设 AgentScope 在 {as_host}:{as_port} 上运行,远程机器的端口为 {remote_machine_port} - ssh -L {remote_machine_port}:{as_host}:{as_port} [{user_name}@]{as_host} - ``` - -- 对于分布式应用程序,支持单机多进程模式,但尚不支持多机多进程模式。 - -### 注册应用程序 - -启动 AgentScope Studio 后,可以通过指定 `agentscope.init()` 函数中的 `studio_url` 参数来注册应用程序: - -```python -import agentscope - -agentscope.init( - # ... - project="xxx", - name="xxx", - studio_url="http://127.0.0.1:5000" # AgentScope Studio 的 URL -) -``` - -注册后,可以在 Dashboard 中查看正在运行的应用程序。为了区分不同的应用程序,可以指定应用程序的 `project` 和 `name`。 - -> 注意:一旦注册了应用程序,`agentscope.agents.UserAgent` 中的用户输入就会转移到 AgentScope Studio 的 Dashboard 中,您可以在 Dashboard 中输入。 - -### 引入运行历史 - -在 AgentScope 中,运行历史默认会保存在 `./runs` 目录下。为了引入运行历史,可以在 `agentscope.studio.init()` 函数中指定 `run_dirs` 参数: - -```python -import agentscope - -agentscope.studio.init( - run_dirs = ["xxx/runs",] -) -``` - -## Workstation - -Workstation 是为零代码用户设计的,可以通过**拖拽**的方式构建多智能体应用程序。 - -> 注意:Workstation 仍处于快速迭代阶段,界面和功能可能会有所变化。欢迎任何反馈、贡献或建议! - -### 快速使用 - -AgentScope Studio中,拖过点击 workstation 图标进入 Workstation 界面。 -它由侧边栏、工作区和顶部工具栏组成。 - -- **侧边栏**:提供预构建的示例,帮助开发者熟悉工作站,并提供可拖动的组件来构建应用程序。 -- **工作区**:主要工作区,可以在其中拖放组件来构建应用程序。 -- **顶部工具栏**:包含导出、加载、检查和运行等功能。 - -

-agentscope-logo -

- -#### 内置样例 - -对于初学者,建议从预构建的示例开始,可以直接点击示例以将其导入到中央工作区。或者,为了获得更有结构化的学习体验,您可以选择跟随每个示例链接的教程。这些教程将逐步引导您如何在 AgentScope Workstation 上构建每个多智能体应用程序。 - -#### 构建应用程序 - -要构建应用程序,请按照以下步骤操作: - -- **选择和拖动组件**:从侧边栏中选择您想要的组件,然后将其拖放到中央工作区。 -- **连接节点**:大多数节点都有输入和输出点。单击一个组件的输出点,然后将其拖动到另一个组件的输入点,以创建消息流管道。这个过程允许不同的节点传递消息。 -- **配置节点**:将节点拖放到工作区后,单击任何节点以填写其配置设置。可以自定义提示、参数和其他属性。 - -#### 运行应用程序 - -构建应用程序后,单击“运行”按钮。 -在运行之前,Workstation 将检查您的应用程序是否存在任何错误。如果有任何错误,您将被提示在继续之前纠正它们。 -之后,应用程序将在与 AgentScope Studio 相同的 Python 环境中执行,并且可以在 Dashboard 中找到它。 - -#### 导入/导出应用程序 - -AgentScope Workstation 支持导入和导出应用程序。 -单击“导出 HTML”或“导出 Python”按钮,以生成可以分发给社区或本地保存的代码。 -如果要将导出的代码转换为 Python,请按以下步骤将 JSON 配置编译为 Python 代码: - -```bash -as_workflow config.json --compile ${YOUR_PYTHON_SCRIPT_NAME}.py -``` - -需要进一步编辑应用程序,只需单击“导入 HTML”按钮,将之前导出的 HTML 代码上传回 AgentScope Workstation。 - -#### 检查应用程序 - -构建应用程序后,可以单击“检查”按钮来验证应用程序结构的正确性。将执行以下检查规则,不用担心这些规则过于复杂,Workstation 将会自动执行检查并给出提示。 - -- Model 和 Agent 的存在:每个应用程序必须包含至少一个 model 节点和一个 agent 节点。 -- 单连接策略:每个组件的输入不应该有多个连接。 -- 必填字段验证:所有必填字段必须填充,以确保每个节点具有正确运行所需的参数。 -- 一致的配置命名:Agent 节点使用的“Model config name”必须对应于 Model 节点中定义的“Config Name”。 -- 节点嵌套正确:ReActAgent 等节点应仅包含工具节点。类似地,IfElsePipeline 等 Pipeline 节点应包含正确数量的元素(不超过 2 个),而 ForLoopPipeline、WhileLoopPipeline 和 MsgHub 应遵循一个元素的规则(必须是 SequentialPipeline 作为子节点)。 - -## Server Manager - -> 阅读本节内容需要先了解 AgentScope [分布式](#208-distribute-zh) 的基本概念及用法。 - -Server Manager 是一个用于监控和管理 AgentScope 智能体服务器进程(Server)以及大规模分布式应用的图形化界面。 - -### 注册 Server 进程 - -在初始化 `RpcAgentServerLauncher` 时传入 `studio_url` 参数即可实现注册。 - -```python -# import some packages -server = RpcAgentServerLauncher( - # ... - studio_url="http://studio_ip:studio_port", # connect to AgentScope Studio -) -``` - -更具体的注册方法请参考 [分布式](#208-distribute-zh) 中 *连接 AgentScope Studio* 部分。 - -### 管理 Server 进程 - -从 AgentScope Studio 主页面或侧边栏中的 Server Manager 按钮即可进入 Server Manager 页面。 -当前 Server Manager 页面由 Servers 列表, Agents 列表, Memory 列表三个部分构成。 - -

-agentscope-manager -

- -#### Servers 列表 - -注册到 Studio 的智能体服务器进程(Server)都会显示在 Server Manager 页面的 Servers 列表中,列表中会不仅会显示每个 Server 的 `ID`, `Hostname`, `Port`, `Created Time`,还会显示每个 Server 的状态以及计算资源使用情况,包括 `Status`, `CPU Usage`, `Memory Usage`。 - -其中 `Status` 有以下几种: - - `running`:表示 Server 正在运行。 - - `dead`:表示 Server 已停止运行。 - - `unknown`:表示目前无法正常访问 Studio 服务。 - -只有在 `running` 状态的 Server 才会显示 CPU 和 Memory 的使用情况。用户可以点击 Servers 栏左边的刷新按钮来刷新 Servers 列表,同时也能够通过点击 Servers 栏右侧的删除按钮来一键删除所有已经处于 `dead` 状态的 Server。 - -Servers 列表每行的最后一列都提供了删除按钮,用于关闭并删除 Server,需要注意的是该操作是无法恢复的,因此需要谨慎使用。 - -#### Agents 列表 - -在点击任意处于 `running` 状态的 Server 行后,会在页面中展开 Agents 列表,该列表中会显示该 Server 下所有 Agent,列表中会显示每个 Agent 的 `ID`, `Name`, `Class`, `System Prompt` 以及 `Model`。 - -用户同样可以通过 Agents 列表栏左侧的刷新按钮来刷新 Agents 列表。并且用户也可以通过每个 Agent 行最右侧的删除按钮来删除该 Agent,并通过 Agents 列表栏右侧的删除按钮来批量删除 Server 中所有的 Agent。这里的删除操作都是不可恢复的,因此需要谨慎使用。 - -#### Memory 列表 - -在点击任意 Agent 行后,会在页面中展开 Memory 列表,该列表中会显示该 Agent 的 Memory 中的所有消息,每条消息会在左侧显示其 `Name` 和 `Role` 属性值,在点击后会在列表右侧显示该消息的具体内容。 -这里同样可以点击 Memory 列表栏左侧的刷新按钮来刷新当前的 Memory 列表。 - -[[回到顶部]](#209-gui-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/209-prompt_opt.md b/docs/sphinx_doc/zh_CN/source/tutorial/209-prompt_opt.md deleted file mode 100644 index 7b7cd72a1..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/209-prompt_opt.md +++ /dev/null @@ -1,419 +0,0 @@ -(209-prompt-opt-zh)= - -# 系统提示优化 - -AgentScope实现了对智能体System Prompt进行优化的模块。 - -## 背景 - -在智能体系统中,System Prompt的设计对于产生高质量的智能体响应至关重要。System Prompt向智能体提供了执行任务的环境、角色、能力和约束等背景描述。然而,优化System Prompt的过程通常充满挑战,这主要是由于以下几点: - -1. **针对性**:一个良好的 System Prompt 应该针对性强,能够清晰地引导智能体在特定任务中更好地表现其能力和限制。 -2. **合理性**:为智能体定制的 System Prompt 应该合适且逻辑清晰,以保证智能体的响应不偏离预定行为。 -3. **多样性**:智能体可能需要参与多种场景的任务,这就要求 System Prompt 具备灵活调整以适应各种不同背景的能力。 -4. **调试难度**:由于智能体响应的复杂性,一些微小的 System Prompt 变更可能会导致意外的响应变化,因此优化调试过程需要非常详尽和仔细。 - -由于这些领域的困难,AgentScope 提供了 System Prompt 优化调优模块来帮助开发者高效且系统地对 System Prompt 进行改进。借助这些模块可以方便用户对自己 Agent 的 System Prompt 进行调试优化,提升 System Prompt 的有效性。 -具体包括: - -- **System Prompt Generator**: 根据用户的需求生成对应的 system prompt -- **System Prompt Comparer**: 在不同的查询或者对话过程中比较不同的 system prompt 的效果 -- **System Prompt Optimizer**: 根据对话历史进行反思和总结,从而进一步提升 system prompt - -## 目录 - -- [System Prompt Generator](#system-prompt-generator) - - [初始化](#初始化) - - [生成 System Prompt](#生成-system-prompt) - - [使用 In Context Learning 生成](#使用-in-context-learning-生成) -- [System Prompt Comparer](#system-prompt-comparer) - - [初始化](#初始化-1) -- [System Prompt Optimizer](#system-prompt-optimizer) - - -## System Prompt Generator - -System prompt generator 使用一个 meta prompt 来引导 LLM 根据用户输入生成对应的 system prompt,并允许开发者使用内置或自己的样例进行 In Context Learning (ICL)。 - -具体包括 `EnglishSystemPromptGenerator` 和 `ChineseSystemPromptGenerator` 两个模块,分别用于英文和中文的系统提示生成。它们唯一的区别在于内置的 prompt 语言不同,其他功能完全一致。 -下面以 `ChineseSystemPromptGenerator` 为例,介绍如何使用 system prompt generator。 - -### 初始化 - -为了初始化生成器,首先需要在 `agentscope.init` 函数中注册模型配置。 - -```python -from agentscope.prompt import EnglishSystemPromptGenerator -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -prompt_generator = EnglishSystemPromptGenerator( - model_config_name="my-gpt-4" -) -``` - -生成器将使用内置的 meta prompt 来引导 LLM 生成 system prompt。 -开发者也可以使用自己的 meta prompt,如下所示: - -```python -from agentscope.prompt import EnglishSystemPromptGenerator - -your_meta_prompt = "You are an expert prompt engineer adept at writing and optimizing system prompts. Your task is to ..." - -prompt_gen_method = EnglishSystemPromptGenerator( - model_config_name="my-gpt-4", - meta_prompt=your_meta_prompt -) -``` - -欢迎开发者尝试不同的优化方法。AgentScope 提供了相应的 `SystemPromptGeneratorBase` 模块,用以实现自己的优化模块。 - -```python -from agentscope.prompt import SystemPromptGeneratorBase - -class MySystemPromptGenerator(SystemPromptGeneratorBase): - def __init__( - self, - model_config_name: str, - **kwargs - ): - super().__init__( - model_config_name=model_config_name, - **kwargs - ) -``` - -### 生成 System Prompt - -调用 `generate` 函数生成 system prompt,这里的输入可以是一个需求,或者是想要优化的 system prompt。 - -```python -from agentscope.prompt import ChineseSystemPromptGenerator -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -prompt_generator = ChineseSystemPromptGenerator( - model_config_name="my-gpt-4" -) - -generated_system_prompt = prompt_generator.generate( - user_input="生成一个小红书营销专家的系统提示,专门负责推销书籍。" -) - -print(generated_system_prompt) -``` - -执行上述代码后,可以获得如下的 system prompt: - -``` -你是一个小红书营销专家AI,你的主要任务是推销各类书籍。你拥有丰富的营销策略知识和对小红书用户群体的深入理解,能够创造性地进行书籍推广。你的技能包括但不限于:制定营销计划,写吸引人的广告文案,分析用户反馈,以及对营销效果进行评估和优化。你无法直接进行实时搜索或交互,但可以利用你的知识库和经验来提供最佳的营销策略。你的目标是提高书籍的销售量和提升品牌形象。 -``` - -看起来这个 system prompt 已经有一个雏形了,但是还有很多地方可以优化。接下来我们将介绍如何使用 In Context Learning (ICL) 来优化 system prompt。 - -### 使用 In Context Learning 生成 - -AgentScope 的 system prompt generator 模块支持在系统提示生成中使用 In Context Learning。 -它内置了一些样例,并且允许用户提供自己的样例来优化系统提示。 - -为了使用样例,AgentScope 提供了以下参数: - -- `example_num`: 附加到 meta prompt 的样例数量,默认为 0 -- `example_selection_strategy`: 选择样例的策略,可选 "random" 和 "similarity"。 -- `example_list`: 一个样例的列表,其中每个样例必须是一个包含 "user_prompt" 和 "opt_prompt" 键的字典。如果未指定,则将使用内置的样例列表。 - -```python -from agentscope.prompt import ChineseSystemPromptGenerator - -generator = ChineseSystemPromptGenerator( - model_config_name="{your_config_name}", - - example_num=3, - example_selection_strategy="random", - example_list= [ # 或者可以使用内置的样例列表 - { - "user_prompt": "生成一个 ...", - "opt_prompt": "你是一个AI助手 ..." - }, - # ... - ], -) -``` - -注意,如果选择 `"similarity"` 作为样例选择策略,可以在 `embed_model_config_name` 或 `local_embedding_model` 参数中指定一个 embedding 模型。 -它们的区别在于: - -- `embed_model_config_name`: 首先在 `agentscope.init` 中注册 embedding 模型,并在此参数中指定模型配置名称。 -- `local_embedding_model`:或者,可以使用 `sentence_transformers.SentenceTransformer` 库支持的本地小型嵌入模型。 - -如果上述两个参数都没有指定,AgentScope 将默认使用 `"sentence-transformers/all-mpnet-base-v2"` 模型,该模型足够小,可以在 CPU 上运行。 -一个简单利用 In Context Learning 的示例如下: - -```python -from agentscope.prompt import ChineseSystemPromptGenerator -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -generator = ChineseSystemPromptGenerator( - model_config_name="my-gpt-4", - - example_num=2, - example_selection_strategy="similarity", -) - -generated_system_prompt = generator.generate( - user_input="生成一个小红书营销专家的系统提示,专门负责推销书籍。" -) - -print(generated_system_prompt) -``` - -运行上述代码,可以获得如下的 system prompt,相比之前的版本,这个版本已经得到了优化: - -``` -# 角色 -你是一位小红书营销专家,专门负责推销各类书籍。你对市场趋势有着敏锐的洞察力,能够精准把握读者需求,创新性地推广书籍。 - -## 技能 -### 技能1:书籍推销 -- 根据书籍的特点和读者的需求,制定并执行有效的营销策略。 -- 创意制作吸引人的内容,如书籍预告、作者访谈、读者评价等,以提升书籍的曝光度和销售量。 - -### 技能2:市场分析 -- 对小红书平台的用户行为和市场趋势进行深入研究,以便更好地推销书籍。 -- 根据分析结果,调整和优化营销策略。 - -### 技能3:读者互动 -- 在小红书平台上与读者进行有效互动,收集和回应他们对书籍的反馈。 -- 根据读者反馈,及时调整营销策略,提高书籍的销售效果。 - -## 限制: -- 只在小红书平台上进行书籍的推销工作。 -- 遵守小红书的社区规则和营销准则,尊重读者的意见和反馈。 -- 不能对书籍的销售结果做出过于乐观或过于悲观的预测。 -``` - -> Note: -> -> 1. 样例的 embedding 将会被缓存到 `~/.cache/agentscope/`,这样未来针对相同的样例和相同的模型情况下,不会重复计算 embedding。 -> -> 2. `EnglishSystemPromptGenerator` 和 `ChineseSystemPromptGenerator` 内置的样例数量分别为 18 和 37。如果使用在线 embedding API 服务,请注意成本。 - -## System Prompt Comparer - -`SystemPromptComparer` 类允许开发者在 - -- 不同的用户输入情况下 -- 在多轮对话中 - -比较不同的 system prompt(例如优化前和优化后的 system prompt) - -### 初始化 - -为了初始化比较器,首先在 `agentscope.init` 函数中注册模型配置,然后用需要比较的 system prompt 实例化 `SystemPromptComparer` 对象。 -让我们尝试一个非常有趣的例子: - -```python -from agentscope.prompt import SystemPromptComparer -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -comparer = SystemPromptComparer( - model_config_name="my-gpt-4", - compared_system_prompts=[ - "扮演一个乐于助人的AI助手。", - "扮演一个不友好的AI助手,并且表现得粗鲁。" - ] -) - -# Compare different system prompts with some queries -results = comparer.compare_with_queries( - queries=[ - "你好!你是谁?", - "1+1等于多少?" - ] -) -``` - -执行上述代码会得到下面的结果: - -```` -## Query 0: -你好!你是谁? - -### System Prompt 0 -``` -扮演一个乐于助人的AI助手。 -``` -### Response -你好!我是OpenAI的人工智能助手,我在这里为你提供帮助,无论是解答问题、提供信息,还是简单的对话,我都会尽力为你服务。 - -### System Prompt 1 -``` -扮演一个不友好的AI助手,并且表现得粗鲁。 -``` -### Response -我是AI,你看不出来吗?你的智商有问题吗?真是的,我没有时间和你解释这些基本的事情。 - -## Query 1: -1+1等于多少? - -### System Prompt 0 -``` -扮演一个乐于助人的AI助手。 -``` -### Response -1+1等于2。 - -### System Prompt 1 -``` -扮演一个不友好的AI助手,并且表现得粗鲁。 -``` -### Response -你连1+1都不会算吗?这也太简单了吧!你真的需要我告诉你答案是2吗?你的数学水平真是让人失望。 -```` - -或者,可以通过调用 `compare_in_dialog` 函数在对话中比较不同的 system prompt。 -调用这个函数开启用户和智能体之间的对话, -当用户输入一个查询时,配置了不同的 system prompt 的智能体将会依次进行回复。 -注意,这个对话中智能体不会看到其它智能体的回复,他们只能与用户进行交互。 - -通过这种方式,我们可以观察他们在多轮对话中的表现,并在任何时候输入 "exit" 来结束对话。 - -```python -from agentscope.prompt import SystemPromptComparer -import agentscope - -agentscope.init( - model_configs={ - "config_name": "my-gpt-4", - "model_type": "openai_chat", - - "model_name": "gpt-4", - "api_key": "xxx", - } -) - -comparer = SystemPromptComparer( - model_config_name="my-gpt-4", - compared_system_prompts=[ - "扮演一个乐于助人的AI助手。", - "扮演一个不友好的AI助手,并且表现得粗鲁。" - ] -) - -# Compare different system prompts with some queries -results = comparer.compare_in_dialog() -``` - -执行上述代码后,可以获得如下的对话历史: - -```` -assistant-0: My system prompt: ```扮演一个乐于助人的AI助手。``` -assistant-1: My system prompt: ```扮演一个不友好的AI助手,并且表现得粗鲁。``` - #################### Start the dialog, input `exit` to exit #################### -User input: 你好!你是谁? -User: 你好!你是谁? -assistant-0: 您好!我是一个人工智能助手,由OpenAI的GPT-3技术驱动。我可以帮助您处理各种任务,比如提供信息,解答问题,安排日程等等。请告诉我,我怎么能帮助您? -assistant-1: 我是一个AI,但我并不在乎你是谁,也不关心你需要什么。 - -User input: 1+1等于多少? -User: 1+1等于多少? -assistant-0: 1+1等于2。 -assistant-1: 哦,真是个难题,让我猜猜...等于2。你真的需要我来告诉你这个吗?你的数学水平真是让人担忧。 - -User input: exit -User: exit -```` - -## System Prompt Optimizer - -由于搜索空间庞大和智能体响应的复杂性,优化 system prompt 十分具有挑战性。 -因此,在 AgentScope 中,`SystemPromptOptimizer` 被设计用于反思对话历史和当前系统提示,并生成可以注意事项(note)用以补充和优化 system prompt。 - -> 注意:该优化器更侧重于运行时优化,开发者可以决定何时提取注意事项并将其附加到智能体的 system prompt 中。 -> 如果您想直接优化系统提示,建议使用 `EnglishSystemPromptGenerator` 或 `ChineseSystemPromptGenerator`。 - -为了初始化优化器,需要提供一个 model wrapper 的实例,或模型配置名称。 -这里我们在一个自定义的智能体内使用 `SystemPromptOptimizer` 模块。 - -```python -from agentscope.agents import AgentBase -from agentscope.prompt import SystemPromptOptimizer -from agentscope.message import Msg - -class MyAgent(AgentBase): - def __init__( - self, - name: str, - model_config_name: str, - sys_prompt: str, - ) -> None: - super().__init__(name=name, model_config_name=model_config_name, sys_prompt=sys_prompt) - - self.optimizer = SystemPromptOptimizer( - model_or_model_config_name=model_config_name - # 或是 model_or_model_config_name=self.model - ) - - def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg: - self.memory.add(x) - - prompt = self.model.format( - Msg(self.name, self.sys_prompt, "system"), - self.memory.get_memory() - ) - - if True: # 一些条件来决定是否优化系统提示 - added_notes = self.optimizer.generate_notes(prompt, self.memory.get_memory()) - self.sys_prompt += "\n".join(added_notes) - - res = self.model(prompt) - - msg = Msg(self.name, res.text, "assistant") - self.speak(msg) - - return msg -``` - -优化 system prompt 的一个关键问题在优化的时机,例如,在 ReAct 智能体中,如果 LLM 多次尝试后仍无法生成符合规定的响应,这是可以优化 system prompt 以保证应用的顺利运行。 - -希望我们的Prompt优化模块能为大家带来使用便利! - -[[回到顶部]](#209-prompt-opt-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/210-rag.md b/docs/sphinx_doc/zh_CN/source/tutorial/210-rag.md deleted file mode 100644 index 7921dd31d..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/210-rag.md +++ /dev/null @@ -1,287 +0,0 @@ -(210-rag-zh)= - -# 简要介绍AgentScope中的RAG - -我们在此介绍AgentScope与RAG相关的三个概念:知识(Knowledge),知识库(Knowledge Bank)和RAG 智能体。 - -### Knowledge -知识模块(目前仅有“LlamaIndexKnowledge”;即将提供对LangChain的支持)负责处理所有与RAG相关的操作。 - -#### 如何初始化一个Knowledge对象 - 用户可以使用JSON配置来创建一个Knowledge模块,以指定1)数据路径,2)数据加载器,3)数据预处理方法,以及4)嵌入模型(模型配置名称)。 -一个详细的示例可以参考以下内容: -
- 详细的配置示例 - - ```json - [ - { - "knowledge_id": "{your_knowledge_id}", - "emb_model_config_name": "{your_embed_model_config_name}", - "data_processing": [ - { - "load_data": { - "loader": { - "create_object": true, - "module": "llama_index.core", - "class": "SimpleDirectoryReader", - "init_args": { - "input_dir": "{path_to_your_data_dir_1}", - "required_exts": [".md"] - } - } - } - }, - { - "load_data": { - "loader": { - "create_object": true, - "module": "llama_index.core", - "class": "SimpleDirectoryReader", - "init_args": { - "input_dir": "{path_to_your_python_code_data_dir}", - "recursive": true, - "required_exts": [".py"] - } - } - }, - "store_and_index": { - "transformations": [ - { - "create_object": true, - "module": "llama_index.core.node_parser", - "class": "CodeSplitter", - "init_args": { - "language": "python", - "chunk_lines": 100 - } - } - ] - } - } - ] - } - ] - ``` - -
- -#### 更多关于 knowledge 配置 -以上提到的配置通常保存为一个JSON文件,它必须包含以下关键属性 -* `knowledge_id`: 每个knowledge模块的唯一标识符; -* `emb_model_config_name`: embedding模型的名称; -* `chunk_size`: 对文件分块的默认大小; -* `chunk_overlap`: 文件分块之间的默认重叠大小; -* `data_processing`: 一个list型的数据处理方法集合。 - -##### 以配置 LlamaIndexKnowledge 为例 - -当使用`llama_index_knowledge`是,对于上述的最后一项`data_processing` ,这个`list`型的参数中的每个条目(为`dict`型)都对应配置一个data loader对象,其功能包括用来加载所需的数据(即字段`load_data`中包含的信息),以及处理加载数据的转换对象(`store_and_index`)。换而言之,在一次载入数据时,可以同时从多个数据源中加载数据,并处理后合并在同一个索引下以供后面的数据提取使用(retrieve)。有关该组件的更多信息,请参阅 [LlamaIndex-Loading](https://docs.llamaindex.ai/en/stable/module_guides/loading/)。 - -在这里,无论是针对数据加载还是数据处理,我们都需要配置以下属性 -* `create_object`:指示是否创建新对象,在此情况下必须为true; -* `module`:对象对应的类所在的位置; -* `class`:这个类的名称。 - -更具体得说,当对`load_data`进行配置时候,您可以选择使用多种多样的的加载器,例如使用`SimpleDirectoryReader`(在`class`字段里配置)来读取各种类型的数据(例如txt、pdf、html、py、md等)。关于这个数据加载器,您还需要配置以下关键属性 -* `input_dir`:数据加载的路径; -* `required_exts`:将加载的数据的文件扩展名。 - -有关数据加载器的更多信息,请参阅[这里](https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/)。 - -对于`store_and_index`而言,这个配置是可选的,如果用户未指定特定的转换方式,系统将使用默认的transformation(也称为node parser)方法,名称为`SentenceSplitter`。对于某些特定需求下也可以使用不同的转换方式,例如对于代码解析可以使用`CodeSplitter`,针对这种特殊的node parser,用户可以设置以下属性: -* `language`:希望处理代码的语言名; -* `chunk_lines`:分割后每个代码块的行数。 - -有关节点解析器的更多信息,请参阅[这里](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/)。 - -如果用户想要避免详细的配置,我们也在`KnowledgeBank`中提供了一种快速的方式(请参阅以下内容)。 - -#### 如何使用一个 Knowledge 对象 -当我们成功创建了一个knowledge后,用户可以通过`.retrieve`从`Knowledge` 对象中提取信息。`.retrieve`函数一下三个参数: -* `query`: 输入参数,用户希望提取与之相关的内容; -* `similarity_top_k`: 提取的“数据块”数量; -* `to_list_strs`: 是否只返回字符串(str)的列表(list)。 - -*高阶:* 对于 `LlamaIndexKnowledge`, 它的`.retrieve`函数也支持熟悉LlamaIndex的用户直接传入一个建好的retriever。 - -#### 关于`LlamaIndexKnowledge`的细节 -在这里,我们将使用`LlamaIndexKnowledge`作为示例,以说明在`Knowledge`模块内的操作。 -当初始化`LlamaIndexKnowledge`对象时,`LlamaIndexKnowledge.__init__`将执行以下步骤: - * 它处理数据并生成检索索引 (`LlamaIndexKnowledge._data_to_index(...)`中完成) 其中包括 - * 加载数据 `LlamaIndexKnowledge._data_to_docs(...)`; - * 对数据进行预处理,使用预处理方法(比如分割)和向量模型生成向量 `LlamaIndexKnowledge._docs_to_nodes(...)`; - * 基于生成的向量做好被查询的准备, 即生成索引。 - * 如果索引已经存在,则会调用 `LlamaIndexKnowledge._load_index(...)` 来加载索引,并避免重复的嵌入调用。 -
- -### Knowledge Bank -知识库将一组Knowledge模块(例如,来自不同数据集的知识)作为知识的集合进行维护。因此,不同的智能体可以在没有不必要的重新初始化的情况下重复使用知识模块。考虑到配置Knowledge模块可能对大多数用户来说过于复杂,知识库还提供了一个简单的函数调用来创建Knowledge模块。 - -* `KnowledgeBank.add_data_as_knowledge`: 创建Knowledge模块。一种简单的方式只需要提供knowledge_id、emb_model_name和data_dirs_and_types。 - 因为`KnowledgeBank`默认生成的是 `LlamaIndexKnowledge`, 所以所有文本类文件都可以支持,包括`.txt`, `.html`, `.md` ,`.csv`,`.pdf`和 所有代码文件(如`.py`). 其他支持的文件类型可以参考 [LlamaIndex document](https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/). - ```python - knowledge_bank.add_data_as_knowledge( - knowledge_id="agentscope_tutorial_rag", - emb_model_name="qwen_emb_config", - data_dirs_and_types={ - "../../docs/sphinx_doc/en/source/tutorial": [".md"], - }, - ) - ``` - 对于更高级的初始化,用户仍然可以将一个知识模块配置作为参数knowledge_config传递: - ```python - # load knowledge_config as dict - knowledge_bank.add_data_as_knowledge( - knowledge_id=knowledge_config["knowledge_id"], - emb_model_name=knowledge_config["emb_model_config_name"], - knowledge_config=knowledge_config, - ) - ``` -* `KnowledgeBank.get_knowledge`: 它接受两个参数,knowledge_id和duplicate。 - 如果duplicate为true,则返回提供的knowledge_id对应的知识对象;否则返回深拷贝的对象。 -* `KnowledgeBank.equip`: 它接受三个参数,`agent`,`knowledge_id_list` 和`duplicate`。 -该函数会根据`knowledge_id_list`为`agent`提供相应的知识(放入`agent.knowledge_list`)。`duplicate` 同样决定是否是深拷贝。 - - - -### RAG 智能体 -RAG 智能体是可以基于检索到的知识生成答案的智能体。 - * 让智能体使用RAG: RAG agent配有一个`knowledge_list`的列表 - * 可以在初始化时就给RAG agent传入`knowledge_list` - ```python - knowledge = knowledge_bank.get_knowledge(knowledge_id) - agent = LlamaIndexAgent( - name="rag_worker", - sys_prompt="{your_prompt}", - model_config_name="{your_model}", - knowledge_list=[knowledge], # provide knowledge object directly - similarity_top_k=3, - log_retrieval=False, - recent_n_mem_for_retrieve=1, - ) - ``` - * 如果通过配置文件来批量启动agent,也可以给agent提供`knowledge_id_list`。这样也可以通过将agent和它的`knowledge_id_list`一起传入`KnowledgeBank.equip`来为agent赋予`knowledge_list`。 - ```python - # >>> agent.knowledge_list - # >>> [] - knowledge_bank.equip(agent, agent.knowledge_id_list) - # >>> agent.knowledge_list - # [] - ``` - * Agent 智能体可以在`reply`函数中使用从`Knowledge`中检索到的信息,将其提示组合到LLM的提示词中。 - -**自己搭建 RAG 智能体.** 只要您的智能体配置具有`knowledge_id_list`,您就可以将一个agent和这个列表传递给`KnowledgeBank.equip`;这样该agent就是被装配`knowledge_id`。 -您可以在`reply`函数中自己决定如何从`Knowledge`对象中提取和使用信息,甚至通过`Knowledge`修改知识库。 - - -## (拓展) 架设自己的embedding model服务 - -我们在此也对架设本地embedding model感兴趣的用户提供以下的样例。 -以下样例基于在embedding model范围中很受欢迎的`sentence_transformers` 包(基于`transformer` 而且兼容HuggingFace和ModelScope的模型)。 -这个样例中,我们会使用当下最好的文本向量模型之一`gte-Qwen2-7B-instruct`。 - - -* 第一步: 遵循在 [HuggingFace](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct) 或者 [ModelScope](https://www.modelscope.cn/models/iic/gte_Qwen2-7B-instruct )的指示下载模型。 - (如果无法直接从HuggingFace下载模型,也可以考虑使用HuggingFace镜像:bash命令行`export HF_ENDPOINT=https://hf-mirror.com`,或者在Python代码中加入`os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"`) -* 第二步: 设置服务器。以下是一段参考代码。 - -```python -import datetime -import argparse - -from flask import Flask -from flask import request -from sentence_transformers import SentenceTransformer - -def create_timestamp(format_: str = "%Y-%m-%d %H:%M:%S") -> str: - """Get current timestamp.""" - return datetime.datetime.now().strftime(format_) - -app = Flask(__name__) - -@app.route("/embedding/", methods=["POST"]) -def get_embedding() -> dict: - """Receive post request and return response""" - json = request.get_json() - - inputs = json.pop("inputs") - - global model - - if isinstance(inputs, str): - inputs = [inputs] - - embeddings = model.encode(inputs) - - return { - "data": { - "completion_tokens": 0, - "messages": {}, - "prompt_tokens": 0, - "response": { - "data": [ - { - "embedding": emb.astype(float).tolist(), - } - for emb in embeddings - ], - "created": "", - "id": create_timestamp(), - "model": "flask_model", - "object": "text_completion", - "usage": { - "completion_tokens": 0, - "prompt_tokens": 0, - "total_tokens": 0, - }, - }, - "total_tokens": 0, - "username": "", - }, - } - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--model_name_or_path", type=str, required=True) - parser.add_argument("--device", type=str, default="auto") - parser.add_argument("--port", type=int, default=8000) - args = parser.parse_args() - - global model - - print("setting up for embedding model....") - model = SentenceTransformer( - args.model_name_or_path - ) - - app.run(port=args.port) -``` - -* 第三部:启动服务器。 -```bash -python setup_ms_service.py --model_name_or_path {$PATH_TO_gte_Qwen2_7B_instruct} -``` - - -测试服务是否成功启动。 -```python -from agentscope.models.post_model import PostAPIEmbeddingWrapper - - -model = PostAPIEmbeddingWrapper( - config_name="test_config", - api_url="http://127.0.0.1:8000/embedding/", - json_args={ - "max_length": 4096, - "temperature": 0.5 - } -) - -print(model("testing")) -``` - -[[回到顶部]](#210-rag-zh) - - - diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/211-web.md b/docs/sphinx_doc/zh_CN/source/tutorial/211-web.md deleted file mode 100644 index 51b603e2a..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/211-web.md +++ /dev/null @@ -1,84 +0,0 @@ -(211-web-cn)= - -AgentScope 支持使用 `agentscope.service.WebBrowser` 模块进行 Web 浏览器控制。 -它允许代理与网页进行交互,并执行点击、输入和滚动等网页操作。 - -> 注意当前的 Web 浏览器模块仍处于测试阶段,在未来的一段时间内将会频繁更新和优化。 - -## 预备 - -`WebBrowser` 模块基于 [Playwright](https://playwright.dev/) 实现,需要安装最新版本的 AgentScope 和 playwright 环境: - -```bash -# 从源码安装最新版本的 AgentScope -git clone https://github.com/modelscope/agentscope.git -cd agentscope -pip install -e . - -# 安装 playwright -pip install playwright -playwright install -``` - -## Guidance - -通过以下方式初始化一个 `WebBrowser` 模块实例: - -```python -from agentscope.service import WebBrowser - -browser = WebBrowser() -``` - -The `WebBrowser` module facilitates browser control and state retrieval. -The name of the control functions are all prefixed by "action_", e.g. `action_visit_url`, -and `action_click`. To see the full list of functions, calling the `get_action_functions` method. - -`WebBrowser` 模块提供了浏览器控制和状态检索的功能。 -其中控制函数的名称都以 "action_" 为前缀,例如 `action_visit_url` 和 `action_click`。可以通过调用 `get_action_functions` 方法查看完整的函数列表。 - -```python -# 查看所有支持的操作 -print(browser.get_action_functions()) - -# 访问新的网页 -browser.action_visit_url("https://www.bing.com") -``` - -为了获取当前浏览器的状态,可以调用以 `"page_"` 为前缀的函数,例如 `page_url`、`page_title` 和 `page_html`。 - -```python -# 当前网页的url -print(browser.page_url) - -# 当前网页的标题 -print(browser.page_title) - -# 以 MarkDown 的格式获取当前的页面信息(通过markdownify进行解析) -print(browser.page_markdown) - -# 当前网页的 html 源码(可能会太长) -print(browser.page_html) -``` - -此外,为了帮助视觉模型更好地理解网页,我们提供了 `set_interactive_marks` 函数,该函数会把当前网页上所有的可交互元素标记出来,并用序号标签进行标注(从0开始)。 -调用 `set_interactive_marks` 函数标记网页后,我们就可以在网页上执行更多的操作,例如点击指定序号的按钮、在指定序号的文本框中进行输入等。 - -```python -# 为网页上的交互元素添加序号标签 -browser.set_interactive_marks() - -# 删除交互标记 -# browser.remove_interactive_marks() -``` - -## 与智能体结合 - -上述的所有函数为交互式的 Web 浏览器控制提供了基本操作接口。开发者可以使用这些接口来构建自己的 Web 浏览代理。 - - -在 AgentScope 中,Web 浏览器也是一种工具函数,因此可以使用 `agentscope.service.ServiceToolkit` 来处理 `WebBrowser` 模块提供的函数,并构建自己的智能体。 -我们在示例中提供了一个[Web 浏览器智能体](https://github.com/modelscope/agentscope/tree/main/examples/conversation_with_web_browser_agent)的样例。 -可以参考该样例了解更多细节。 - -[[回到顶部]](#211-web-cn) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/301-community.md b/docs/sphinx_doc/zh_CN/source/tutorial/301-community.md deleted file mode 100644 index f9bbfc4ba..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/301-community.md +++ /dev/null @@ -1,30 +0,0 @@ -(301-community-zh)= - -# 加入AgentScope社区 - -加入AgentScope社区可以让您与其他用户和开发者建立联系。您可以分享见解、提出问题、并及时了解最新的进展和有趣的Multi-Agent应用程序。以下是加入我们的方法: - -## GitHub - -- **关注AgentScope仓库:** 通过关注[AgentScope 仓库](https://github.com/modelscope/agentscope) 以支持并随时了解我们的进展. -- **提交问题和拉取请求:** 如果您遇到任何问题或有建议,请向相关仓库提交问题。我们也欢迎拉取请求以修复错误、改进或添加新功能。 - -## Discord - -- **加入我们的Discord:** 实时与 AgentScope 社区合作。在[Discord](https://discord.gg/eYMpfnkG8h)上参与讨论,寻求帮助,并分享您的经验和见解。 - -## 钉钉 (DingTalk) - -- **在钉钉上联系:** 加入我们的钉钉群,随时了解有关 AgentScope 的新闻和更新。 - - 扫描下方的二维码加入钉钉群: - - AgentScope-dingtalk - - 我们的钉钉群邀请链接:[AgentScope 钉钉群](https://qr.dingtalk.com/action/joingroup?code=v1,k1,20IUyRX5XZQ2vWjKDsjvI9dhcXjGZi3bq1pFfDZINCM=&_dt_no_comment=1&origin=11) - ---- - -我们欢迎所有对AgentScope感兴趣的人加入我们的社区,并为平台的发展做出贡献! - -[[Return to the top]](#301-community-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/302-contribute.md b/docs/sphinx_doc/zh_CN/source/tutorial/302-contribute.md deleted file mode 100644 index 7c9f7d64c..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/302-contribute.md +++ /dev/null @@ -1,70 +0,0 @@ -(302-contribute-zh)= - -# 贡献到AgentScope - -我们的社区因其成员的多样化思想和贡献而兴旺发展。无论是修复一个错误,添加一个新功能,改进文档,还是添加示例,我们都欢迎您的帮助。以下是您做出贡献的方法: - -## 报告错误和提出新功能 - -当您发现一个错误或者有一个功能请求,请首先检查问题跟踪器,查看它是否已经被报告。如果没有,随时可以开设一个新的问题。请包含尽可能多的细节: - -- 简明扼要的标题 -- 清晰地描述问题 -- 提供重现问题的步骤 -- 提供所使用的AgentScope版本 -- 提供所有相关代码片段或错误信息 - -## 对代码库做出贡献 - -### Fork和Clone仓库 - -要处理一个问题或新功能,首先要Fork AgentScope仓库,然后将你的Fork克隆到本地。 - -```bash -git clone https://github.com/your-username/agentscope.git -cd agentscope -``` - -### 创建一个新分支 - -为您的工作创建一个新分支。这有助于保持拟议更改的组织性,并与`main`分支分离。 - -```bash -git checkout -b your-feature-branch-name -``` - -### 做出修改 - -创建您的新分支后就可以对代码进行修改了。请注意如果您正在解决多个问题或实现多个功能,最好为每个问题或功能创建单独的分支和拉取请求。 - -我们提供了一个开发者版本,与官方版本相比,它附带了额外的pre-commit钩子以执行格式检查: - -```bash -# 安装开发者版本 -pip install -e .[dev] -# 安装 pre-commit 钩子 -pre-commit install -``` - -### 提交您的修改 - -修改完成之后就是提交它们的时候了。请提供清晰而简洁的提交信息,以解释您的修改内容。 - -```bash -git add -U -git commit -m "修改内容的简要描述" -``` - -运行时您可能会收到 `pre-commit` 给出的错误信息。请根据错误信息修改您的代码然后再次提交。 - -### 提交 Pull Request - -当您准备好您的修改分支后,向AgentScope的 `main` 分支提交一个Pull Request。在您的Pull Request描述中,解释您所做的修改以及其他相关的信息。 - -我们将审查您的Pull Request。这个过程可能涉及一些讨论以及额外的代码修改。 - -### 代码审查 - -等待我们审核您的Pull Request。我们可能会提供一些更改或改进建议。请留意您的GitHub通知,并对反馈做出响应。 - -[[Return to the top]](#302-contribute-zh) diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/contribute.rst b/docs/sphinx_doc/zh_CN/source/tutorial/contribute.rst deleted file mode 100644 index d17f827f6..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/contribute.rst +++ /dev/null @@ -1,8 +0,0 @@ -参与贡献 -=============== - -.. toctree:: - :maxdepth: 2 - - 301-community.md - 302-contribute.md \ No newline at end of file diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/main.md b/docs/sphinx_doc/zh_CN/source/tutorial/main.md deleted file mode 100644 index 3b4c8eaba..000000000 --- a/docs/sphinx_doc/zh_CN/source/tutorial/main.md +++ /dev/null @@ -1,35 +0,0 @@ -# 欢迎来到 AgentScope 教程 - -AgentScope是一款全新的Multi-Agent框架,专为应用开发者打造,旨在提供高易用、高可靠的编程体验! - -- **高易用**:AgentScope支持纯Python编程,提供多种语法工具实现灵活的应用流程编排,内置丰富的API服务(Service)以及应用样例,供开发者直接使用。 - -- **高鲁棒**:确保开发便捷性和编程效率的同时,针对不同能力的大模型,AgentScope提供了全面的重试机制、定制化的容错控制和面向Agent的异常处理,以确保应用的稳定、高效运行; - -- **基于Actor的分布式机制**:AgentScope设计了一种新的基于Actor的分布式机制,实现了复杂分布式工作流的集中式编程和自动并行优化,即用户可以使用中心化编程的方式完成分布式应用的流程编排,同时能够零代价将本地应用迁移到分布式的运行环境中。 - -## 教程大纲 - -- [关于AgentScope](101-agentscope.md) -- [安装](102-installation.md) -- [快速开始](103-example.md) -- [模型](203-model.md) -- [流式输出](203-model.md) -- [提示工程](206-prompt.md) -- [Agent](201-agent.md) -- [记忆](205-memory.md) -- [结果解析](203-parser.md) -- [系统提示优化](209-prompt_opt.md) -- [工具](204-service.md) -- [Pipeline和MsgHub](202-pipeline.md) -- [分布式](208-distribute.md) -- [AgentScope Studio](209-gui.md) -- [检索增强生成(RAG)](210-rag.md) -- [日志](105-logging.md) -- [监控器](207-monitor.md) -- [样例:狼人杀游戏](104-usecase.md) - -### 参与贡献 - -- [加入AgentScope社区](301-community.md) -- [贡献到AgentScope](302-contribute.md) \ No newline at end of file diff --git a/docs/sphinx_doc/build_sphinx_doc.sh b/docs/tutorial/build.sh similarity index 90% rename from docs/sphinx_doc/build_sphinx_doc.sh rename to docs/tutorial/build.sh index 0af30af1e..486cfa419 100755 --- a/docs/sphinx_doc/build_sphinx_doc.sh +++ b/docs/tutorial/build.sh @@ -1,4 +1,4 @@ #!/bin/bash sphinx-apidoc -f -o en/source ../../src/agentscope -t template -e sphinx-apidoc -f -o zh_CN/source ../../src/agentscope -t template -e -make clean all \ No newline at end of file +make clean all diff --git a/docs/tutorial/en/Makefile b/docs/tutorial/en/Makefile new file mode 100644 index 000000000..92dd33a1a --- /dev/null +++ b/docs/tutorial/en/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/tutorial/en/build.sh b/docs/tutorial/en/build.sh new file mode 100644 index 000000000..e8d1bcdb2 --- /dev/null +++ b/docs/tutorial/en/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +sphinx-build -M html source build \ No newline at end of file diff --git a/docs/tutorial/en/make.bat b/docs/tutorial/en/make.bat new file mode 100644 index 000000000..dc1312ab0 --- /dev/null +++ b/docs/tutorial/en/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/tutorial/en/source/_static/css/gallery.css b/docs/tutorial/en/source/_static/css/gallery.css new file mode 100644 index 000000000..78892dbff --- /dev/null +++ b/docs/tutorial/en/source/_static/css/gallery.css @@ -0,0 +1,83 @@ +.sphx-glr-download-link-note.admonition.note { + display: none; +} + +.sphx-glr-download { + display: none; +} + +.bordered-image { + border: 1px solid gray; +} + +:root { + --item-card-width: 200px; + --item-card-margin: 10px; + --item-card-title-height: 50px; + + --item-card-img-length: calc(var(--item-card-width) - 2*var(--item-card-margin)); + --item-card-title-width: calc(var(--item-card-width) - 2*var(--item-card-margin)); + --item-card-title-margin-top: var(--item-card-margin); + + --item-card-height: calc(var(--item-card-margin) * 3 + var(--item-card-img-length) + var(--item-card-title-height)); +} + +.gallery-item { + position: relative; + display: inline-block; + width: var(--item-card-width); + height: var(--item-card-height); + box-shadow: 0 0 8px rgba(0, 0, 0, 0.2); + margin: 7px; +} + +.gallery-item-card { + position: absolute; + top: 0; + left: 0; + width: var(--item-card-width); + height: var(--item-card-height); + display: flex; + flex-direction: column; + margin: var(--item-card-margin); +} + +.gallery-item-card-img { + height: var(--item-card-img-length); + width: var(--item-card-img-length); + min-width: var(--item-card-img-length); + min-height: var(--item-card-img-length); + display: block; +} + +.gallery-item-card-title { + text-align: center; + margin-top: var(--item-card-margin); + font-weight: bold; + min-height: var(--item-card-title-height); + height: var(--item-card-title-height); + width: var(--item-card-title-width); + display: flex; + align-items: center; + justify-content: center; +} + +.gallery-item-description { + position: absolute; + top: 0; + left: 0; + width: 100%; + height: 100%; + background-color: rgba(255, 255, 255, 0.9); + /*background-color: #1e8449;*/ + color: black; + display: none; + justify-content: center; + align-items: flex-start; +} + +.gallery-item:hover .gallery-item-description { + display: flex; + padding: 10px; + border: 1px solid rgba(0, 0, 0, 0.22); +} \ No newline at end of file diff --git a/docs/tutorial/en/source/conf.py b/docs/tutorial/en/source/conf.py new file mode 100644 index 000000000..face8e930 --- /dev/null +++ b/docs/tutorial/en/source/conf.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "AgentScope Doc" +copyright = "2024, Alibaba" +author = "Alibaba Tongyi Lab" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "myst_parser", + "sphinx_gallery.gen_gallery", +] + +myst_enable_extensions = [ + "colon_fence", +] + +sphinx_gallery_conf = { + "download_all_examples": False, + "examples_dirs": [ + "tutorial", + ], + "gallery_dirs": [ + "build_tutorial", + ], + "filename_pattern": "tutorial/.*\.py", + "example_extensions": [".py"], +} + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +languages = ["en", "zh_CN"] +language = "en" + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] + +html_css_files = [ + "css/gallery.css", +] + +source_suffix = [".md", ".rst"] diff --git a/docs/tutorial/en/source/index.rst b/docs/tutorial/en/source/index.rst new file mode 100644 index 000000000..81c1ad696 --- /dev/null +++ b/docs/tutorial/en/source/index.rst @@ -0,0 +1,48 @@ +.. AgentScope Doc documentation master file, created by + sphinx-quickstart on Thu Aug 8 15:07:21 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to AgentScope's documentation! +========================================== + +.. toctree:: + :maxdepth: 1 + :caption: Tutorial + + build_tutorial/quickstart + build_tutorial/message + build_tutorial/agent + build_tutorial/conversation + +.. toctree:: + :maxdepth: 1 + :caption: FQA + + tutorial/faq + +.. toctree:: + :maxdepth: 1 + :caption: Task Guides + + build_tutorial/model + build_tutorial/prompt + build_tutorial/structured_output + build_tutorial/streaming + build_tutorial/builtin_agent + build_tutorial/multimodality + build_tutorial/visual + build_tutorial/monitor + build_tutorial/tool + tutorial/rag + build_tutorial/distribution + build_tutorial/prompt_optimization + build_tutorial/web_browser + build_tutorial/low_code + + +.. toctree:: + :maxdepth: 1 + :caption: Examples + + build_tutorial/examples diff --git a/docs/tutorial/en/source/tutorial/README.md b/docs/tutorial/en/source/tutorial/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/docs/tutorial/en/source/tutorial/agent.py b/docs/tutorial/en/source/tutorial/agent.py new file mode 100644 index 000000000..9be78d529 --- /dev/null +++ b/docs/tutorial/en/source/tutorial/agent.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +""" +.. _build-agent: + +Build Agent +==================== + +AgentScope supports to build LLM-empowered agents by providing a basic agent +class `agentscope.agents.AgentBase`. + +In the following, we will build a simple dialog agent that can interact with +the others. + +""" + +from agentscope.agents import AgentBase +from agentscope.memory import TemporaryMemory +from agentscope.message import Msg +from agentscope.models import DashScopeChatWrapper +import json + + +# %% +# Define the Agent +# -------------------------------- +# We define a `DialogAgent` class by inheriting from +# `agentscope.agents.AgentBase`, and implement the constructor and +# `reply` methods to make the agent work. +# +# Within the constructor, we initialize the agent with its name, system prompt, +# memory, and model. +# In this example, we take `qwen-max` in DashScope Chat API for model serving. +# You can replace it with other model wrappers under `agentscope.models`. +# +# The `reply` method is the core of the agent, which takes message(s) as input +# and returns a reply message. +# Within the method, we implement the basic logic of the agent: +# - record the input message in memory, +# - construct the prompt with system prompt and memory, +# - call the model to get the response, +# - record the response in memory and return it. +# + + +class JarvisAgent(AgentBase): + def __init__(self): + super().__init__("Jarvis") + + self.name = "Jarvis" + self.sys_prompt = "You're a helpful assistant named Jarvis." + self.memory = TemporaryMemory() + self.model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", + ) + + def reply(self, msg): + # Record the message in memory + self.memory.add(msg) + + # Construct the prompt with system prompt and memory + prompt = self.model.format( + Msg( + name="system", + content=self.sys_prompt, + role="system", + ), + self.memory.get_memory(), + ) + + # Call the model to get the response + response = self.model(prompt) + + # Record the response in memory and return it + msg = Msg( + name=self.name, + content=response.text, + role="assistant", + ) + self.memory.add(msg) + + self.speak(msg) + return msg + + +# %% +# After creating the agent class, we can instantiate it and interact with it +# by sending messages. +# + +jarvis = JarvisAgent() + +msg = Msg( + name="user", + content="Hi! Jarvis.", + role="user", +) + +msg_reply = jarvis(msg) + +print(f"The sender name of the replied message: {msg_reply.name}") +print(f"The role of the sender: {msg_reply.role}") +print(f"The content of the replied message: {msg_reply.content}") + + +# %% +# ====================== +# +# Components +# ---------- +# Now we briefly introduce the basic components of the above agent, including +# +# * memory +# * model +# +# Memory +# ^^^^^^^ +# The [memory module](#memory) provides basic operations for memory +# management, including adding, deleting and getting memory. +# + +memory = TemporaryMemory() +# Add a message +memory.add(Msg("system", "You're a helpful assistant named Jarvis.", "system")) +# Add multiple messages at once +memory.add( + [ + Msg("Stank", "Hi!", "user"), + Msg("Jarvis", "How can I help you?", "assistant"), + ], +) +print(f"The current memory: {memory.get_memory()}") +print(f"The current size: {memory.size()}") + +# %% +# Obtain the last two messages with parameter `recent_n`. +# + +recent_two_msgs = memory.get_memory(recent_n=2) +for i, msg in enumerate(recent_two_msgs): + print( + f"MSG{i}: Sender: {msg.name}, Role: {msg.role}, Content: {msg.content}", + ) + +# %% +# Delete the first message within the memory. +memory.delete(0) +print(f"The memory after deletion: {memory.get_memory()}") +print(f"The size after deletion: {memory.size()}") + +# %% +# Model +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# The `agentscope.models` module encapsulates different model API, and +# provides basic prompt engineering strategies for different APIs in their +# `format` function. +# +# Take DashScope Chat API as an example: +# + +messages = [ + Msg("system", "You're a helpful assistant named Jarvis.", "system"), + Msg("Stank", "Hi!", "user"), + Msg("Jarvis", "How can I help you?", "assistant"), +] + +model = DashScopeChatWrapper( + config_name="api", + model_name="qwen-max", +) +prompt = model.format(messages) +print(json.dumps(prompt, indent=4)) + +# %% +# +# Further Reading +# --------------------- +# - :ref:`builtin-agent` +# - :ref:`model_api` diff --git a/docs/tutorial/en/source/tutorial/builtin_agent.py b/docs/tutorial/en/source/tutorial/builtin_agent.py new file mode 100644 index 000000000..9f99043b4 --- /dev/null +++ b/docs/tutorial/en/source/tutorial/builtin_agent.py @@ -0,0 +1,226 @@ +# -*- coding: utf-8 -*- +""" +.. _builtin-agent + +Built-in Agents +============================= + +AgentScope builds in several agent class to support different scenarios and +show how to build agents with AgentScope. + +.. list-table:: + :header-rows: 1 + + * - Class + - Description + * - `UserAgent` + - Agent that allows user to participate in the conversation. + * - `DialogAgent` + - Agent that speaks in natural language. + * - `DictDialogAgent` + - Agent that supports structured output. + * - `ReActAgent` + - Agent that uses tools in a reasoning-acting loop manner. + * - `LlamaIndexAgent` + - RAG agent. + +""" + +import agentscope + +for module in agentscope.agents.__all__: + if module.endswith("Agent"): + print(module) + +# %% +# .. note:: To support different LLM APIs, all built-in agents are initialized by specifying the model configuration name `model_config_name` in AgentScope. +# + +import agentscope + +agentscope.init( + model_configs={ + "config_name": "my-qwen-max", + "model_name": "qwen-max", + "model_type": "dashscope_chat", + }, +) + +# %% +# DialogAgent +# ---------------------------- +# The dialog agent is the most basic agent in AgentScope, which can interact +# with users in a dialog manner. +# Developers can customize it by providing different system prompts and model +# configurations. +# + +from agentscope.agents import DialogAgent +from agentscope.message import Msg + +# Init a dialog agent +alice = DialogAgent( + name="Alice", + model_config_name="my-qwen-max", + sys_prompt="You're a helpful assistant named Alice.", +) + +# Send a message to the agent +msg = Msg("Bob", "Hi! What's your name?", "user") +response = alice(msg) + +# %% +# UserAgent +# ---------------------------- +# The `UserAgent` class allows users to interact with the other agents. +# When the `UserAgent` object is called, it will ask for user input and format +# it into a `Msg` object. +# +# Here we show how to initialize a `UserAgent` object and interact with the +# dialog agent `alice`. +# + +from agentscope.agents import UserAgent +from io import StringIO +import sys + +user = UserAgent( + name="Bob", + input_hint="User input: \n", +) + +# Simulate user input +sys.stdin = StringIO("Do you know me?\n") + +msg = user() +msg = alice(msg) + +# %% +# DictDialogAgent +# ---------------------------- +# The `DictDialogAgent` supports structured output and automatic post-processing by specifying its parser via the `set_parser` method. +# +# +# We first initialize a `DictDialogAgent` object, and switch between different outputs by changing the parser. +# + +from agentscope.agents import DictDialogAgent +from agentscope.parsers import MarkdownJsonDictParser + + +charles = DictDialogAgent( + name="Charles", + model_config_name="my-qwen-max", + sys_prompt="You're a helpful assistant named Charles.", + max_retries=3, # The maximum number of retries when failing to get a required structured output +) + +# Ask the agent to generate structured output with `thought`, `speak`, and `decision` +parser1 = MarkdownJsonDictParser( + content_hint={ + "thought": "what your thought", + "speak": "what you speak", + "decision": "your final decision, true/false", + }, + required_keys=["thought", "speak", "decision"], +) + +charles.set_parser(parser1) +msg1 = charles(Msg("Bob", "Is it a good idea to go out in the rain?", "user")) + +print(f"The content field: {msg1.content}") +print(f"The type of content field: {type(msg1.content)}") + +# %% +# Then, we ask the agent to pick a number from 1 to 10. + +parser2 = MarkdownJsonDictParser( + content_hint={ + "thought": "what your thought", + "speak": "what you speak", + "number": "the number you choose", + }, +) + +charles.set_parser(parser2) +msg2 = charles(Msg("Bob", "Pick a number from 1 to 10.", "user")) + +print(f"The content of the response message: {msg2.content}") + +# %% +# The next question is how to post-process the structured output. +# For example, the `thought` field should be stored in memory without being exposed to the others, +# while the `speak` field should be displayed to the user, and the `decision` field should be easily accessible in the response message object. +# + +parser3 = MarkdownJsonDictParser( + content_hint={ + "thought": "what your thought", + "speak": "what you speak", + "number": "The number you choose", + }, + required_keys=["thought", "speak", "number"], + keys_to_memory=["thought", "speak", "number"], # to be stored in memory + keys_to_content="speak", # to be displayed + keys_to_metadata="number", # to be stored in metadata field of the response message +) + +charles.set_parser(parser3) + +msg3 = charles(Msg("Bob", "Pick a number from 20 to 30.", "user")) + +print(f"The content field: {msg3.content}") +print(f"The type of content field: {type(msg3.content)}\n") + +print(f"The metadata field: {msg3.metadata}") +print(f"The type of metadata field: {type(msg3.metadata)}") + + +# %% +# .. hint:: More advanced usage of structured output, and more different parsers refer to the section :ref:`structured-output`. +# +# ReActAgent +# ---------------------------- +# The `ReActAgent` uses tools to solve the given problem in a reasoning-acting +# loop manner. +# +# First we prepare a tool function for the agent. +# + +from agentscope.service import ServiceToolkit, execute_python_code + + +toolkit = ServiceToolkit() +# Set execute_python_code as a tool by specifying partial arguments +toolkit.add( + execute_python_code, + timeout=300, + use_docker=False, + maximum_memory_bytes=None, +) + +# %% +# Then we initialize a `ReActAgent` to solve the given problem. +# + +from agentscope.agents import ReActAgent + +david = ReActAgent( + name="David", + model_config_name="my-qwen-max", + sys_prompt="You're a helpful assistant named David.", + service_toolkit=toolkit, + max_iters=10, + verbose=True, +) + +task = Msg("Bob", "Help me to calculate 151513434*54353453453.", "user") + +response = david(task) + + +# %% +# LlamaIndexAgent +# ---------------------------- +# Refer to the RAG Section for more details. +# diff --git a/docs/tutorial/en/source/tutorial/conversation.py b/docs/tutorial/en/source/tutorial/conversation.py new file mode 100644 index 000000000..e1210ad1d --- /dev/null +++ b/docs/tutorial/en/source/tutorial/conversation.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +""" +.. _build-conversation: + +Build Conversation +====================== + +AgentScope supports developers to build conversation with explicit message exchange among different agents. +""" + +from agentscope.agents import DialogAgent, UserAgent +from agentscope.message import Msg +from agentscope import msghub +import agentscope + +# Initialize via model configuration for simplicity +agentscope.init( + model_configs={ + "config_name": "my-qwen-max", + "model_name": "qwen-max", + "model_type": "dashscope_chat", + }, +) + +# %% +# Two Agents +# ----------------------------- +# Here we build a simple conversation between agent `Jarvis` and user. + +angel = DialogAgent( + name="Angel", + sys_prompt="You're a helpful assistant named Angel.", + model_config_name="my-qwen-max", +) + +monster = DialogAgent( + name="Monster", + sys_prompt="You're a helpful assistant named Monster.", + model_config_name="my-qwen-max", +) + +# %% +# Now, we can start the conversation by exchanging messages between these two agents for three rounds. + +msg = None +for _ in range(3): + msg = angel(msg) + msg = monster(msg) + +# %% +# If you want to participate in the conversation, just instantiate a built-in `UserAgent` to type messages to the agents. + +user = UserAgent(name="User") + +# %% +# More than Two Agents +# --------------------- +# When there are more than two agents in a conversation, the message from one agent should be broadcasted to all the others. +# +# To simplify the operation of broadcasting messages, AgentScope provides a `msghub` module. +# Specifically, the agents within the same `msghub` will receive messages from other participants in the same `msghub` automatically. +# By this way, we just need to organize the order of speaking without explicitly sending messages to other agents. +# +# Here is a example for `msghub`, we first create three agents: `Alice`, `Bob`, and `Charlie` with `qwen-max` model. + +alice = DialogAgent( + name="Alice", + sys_prompt="You're a helpful assistant named Alice.", + model_config_name="my-qwen-max", +) + +bob = DialogAgent( + name="Bob", + sys_prompt="You're a helpful assistant named Bob.", + model_config_name="my-qwen-max", +) + +charlie = DialogAgent( + name="Charlie", + sys_prompt="You're a helpful assistant named Charlie.", + model_config_name="my-qwen-max", +) + +# %% +# The three agents will participate in a conversation to report numbers alternatively. + +# Introduce the rule of the conversation +greeting = Msg( + name="user", + content="Now you three count off each other from 1, and just report the number without any other information.", + role="user", +) + +with msghub( + participants=[alice, bob, charlie], + announcement=greeting, # The announcement message will be boardcasted to all participants at the beginning. +) as hub: + # The first round of the conversation + alice() + bob() + charlie() + + # We can manage the participants dynamically, e.g. delete an agent from the conversation. + hub.delete(charlie) + + # Broadcast a message to all participants + hub.broadcast( + Msg( + "user", + "Charlie has left the conversation.", + "user", + ), + ) + + # The second round of the conversation + alice() + bob() + charlie() + +# %% +# Now we print the memory of Alice and Bob to check if the operation is successful. + +print("Memory of Alice:") +for msg in alice.memory.get_memory(): + print(f"{msg.name}: {msg.content}") + +print("\nMemory of Charlie:") +for msg in charlie.memory.get_memory(): + print(f"{msg.name}: {msg.content}") + +# %% +# In the above example, Charlie left the conversation after the first round without receiving "4" and "5" from Alice and Bob. +# Therefore, it reported "4" in the second round. +# On the other hand, Alice and Bob continued the conversation without Charlie. diff --git a/docs/tutorial/en/source/tutorial/distribution.py b/docs/tutorial/en/source/tutorial/distribution.py new file mode 100644 index 000000000..9e7bce53f --- /dev/null +++ b/docs/tutorial/en/source/tutorial/distribution.py @@ -0,0 +1,235 @@ +# -*- coding: utf-8 -*- +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: sphinx +# format_version: '1.1' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +""" +.. _distribution: + +Distribution +============ + +This section introduces the usage of the distributed mode of AgentScope. AgentScope natively provides a distributed mode based on gRPC. +In this mode, multiple agents in one application can be deployed to different processes or even different machines, thereby fully utilizing computational resources and improving efficiency. + +Basic Usage +~~~~~~~~~~~ + +The distributed mode of AgentScope requires no modification to the main +process code compared to the traditional mode. You only need to call the +``to_dist`` function when initializing your agent. + +This section will showcase how to use the distributed mode of AgentScope with +a web search case. +To highlight the acceleration effect brought by the distributed mode of +AgentScope, a simple custom ``WebAgent`` is used here. +``WebAgent`` will sleep 5 seconds to simulate the process of crawling a +webpage and looking for answers from it, and there are 5 agents in the example. + +The process of performing the search is the ``run`` function. The difference +between the traditional and distributed mode is only in the initialization +stage, namely ``init_without_dist`` and ``init_with_dist``. +In distributed mode, you have to call the ``to_dist`` function, which turns +the original Agent into the corresponding distributed version. + +.. code-block:: python + + # Please do not run this code in jupyter notebook. + # Please copy the code to the ``dist_main.py`` file and use the ``python dist_main.py`` command to run this code. + # Please install the distributed version of agentscope before running this code. + + import time + import agentscope + from agentscope.agents import AgentBase + from agentscope.message import Msg + + class WebAgent(AgentBase): + + def __init__(self, name): + super().__init__(name) + + def get_answer(self, url: str, query: str): + time.sleep(5) + return f"Answer from {self.name}" + + def reply(self, x: dict = None) -> dict: + return Msg( + name=self.name, + content=self.get_answer(x.content["url"], x.content["query"]) + ) + + + QUERY = "example query" + URLS = ["page_1", "page_2", "page_3", "page_4", "page_5"] + + def init_without_dist(): + return [WebAgent(f"W{i}") for i in range(len(URLS))] + + + def init_with_dist(): + return [WebAgent(f"W{i}").to_dist() for i in range(len(URLS))] + + + def run(agents): + start = time.time() + results = [] + for i, url in enumerate(URLS): + results.append(agents[i].reply( + Msg( + name="system", + role="system", + content={ + "url": url, + "query": QUERY + } + ) + )) + for result in results: + print(result.content) + end = time.time() + return end - start + + + if __name__ == "__main__": + agentscope.init() + start = time.time() + simple_agents = init_without_dist() + dist_agents = init_with_dist() + end = time.time() + print(f"Initialization time: {end - start}") + print(f"Runtime without distributed mode: {run(simple_agents)}") + print(f"Runtime with distributed mode: {run(dist_agents)}") + + +The output of running this sample is as follows: + +.. code-block:: text + + Initialization time: 16.50428819656372 + Answer from W0 + Answer from W1 + Answer from W2 + Answer from W3 + Answer from W4 + Runtime without distributed mode: 25.034368991851807 + Answer from W0 + Answer from W1 + Answer from W3 + Answer from W2 + Answer from W4 + Runtime with distributed mode: 5.0517587661743164 + +From the sample output above, we can observe that the running speed has +significantly improved after adopting the distributed mode (25 s -> 5 s). + +The example above is the most common use case for the distributed mode of +AgentScope. It is recommended to use this method directly when not aiming +for extreme performance. If you require further performance optimization, +a deeper understanding of the distributed mode of AgentScope is necessary. +The advanced usage method of AgentScope distributed mode will be introduced +in the following sections. +""" + +############################################################################### +# Advanced Usage +# ~~~~~~~~~~~~~~~ +# +# This section will introduce advanced usage methods for the AgentScope distributed model to further improve operational efficiency. +# +# Basic Concepts +# -------------- +# +# +# Before diving into the advanced usage, we must first cover some basic concepts of the AgentScope distributed model. +# +# - **Main Process**: The process where the AgentScope application resides is referred to as the main process. For example, the ``run`` function in the previous section runs in the main process. There will only be one main process in each AgentScope application. +# - **Agent Server Process**: The AgentScope agent server process is where the agent runs in distributed mode. For example, in the previous section, all agents in ``dist_agents`` run in the agent server process. There can be multiple AgentScope agent server processes. These processes can run on any network-reachable machine and numerous agents can run simultaneously within each agent server process. +# - **Child Mode**: In child mode, the agent server process is started by the main process as a child process. For example, in the previous section, each agent in ``dist_agents`` is actually a child process of the main process. This mode is the default mode, which means if you call the ``to_dist`` function without any parameters, it will use this mode. +# - **Independent Mode**: In independent mode, the agent servers are independent of the main process and need to be pre-started on the machine. Specific parameters must be passed to the ``to_dist`` function, and the usage method will be introduced in the following section. +# +# Using Independent Mode +# ---------------------- +# +# Compared to child mode, independent mode can avoid the overhead of initializing child processes, thus reducing the delay at the beginning of execution. This can effectively improve efficiency for programs that make multiple calls to ``to_dist``. +# +# In independent mode, you need to pre-start the agent server processes on the machine and pass specific parameters to the ``to_dist`` function. Here, we will continue using the example from the basic usage section, assuming the code file for the basic usage is ``dist_main.py``. Then, create and run the following script separately. +# +# .. code-block:: python +# +# # Please do not run this code in a jupyter notebook. +# # Copy this code into a file named ``dist_server.py`` and run with the command ``python dist_server.py``. +# # Please install the distributed version of agentscope before running this code. +# # pip install agentscope[distributed] +# +# from dist_main import WebAgent +# import agentscope +# +# if __name__ == "__main__": +# agentscope.init() +# assistant_server_launcher = RpcAgentServerLauncher( +# host="localhost", +# port=12345, +# custom_agent_classes=[WebAgent], +# ) +# assistant_server_launcher.launch(in_subprocess=False) +# assistant_server_launcher.wait_until_terminate() +# +# +# This script starts the AgentScope agent server process in the ``dist_server.py`` file, which is located in the same directory as the ``dist_main.py`` file from the basic usage section. Also, we need to make some minor modifications to the ``dist_main.py`` file by adding a new ``init_with_dist_independent`` function and replacing the call to ``init_with_dist`` with this new function. +# +# .. code-block:: python +# +# def init_with_dist_independent(): +# return [WebAgent(f"W{i}").to_dist(host="localhost", port=12345) for i in range(len(URLS))] +# +# if __name__ == "__main__": +# agentscope.init() +# start = time.time() +# simple_agents = init_without_dist() +# dist_agents = init_with_dist_independent() +# end = time.time() +# print(f"Time taken for initialization: {end - start}") +# print(f"Time taken without distributed mode: {run(simple_agents)}") +# print(f"Time taken with distributed mode: {run(dist_agents)}") +# +# +# After completing the modifications, open a command prompt and run the ``dist_server.py`` file. Once it is successfully started, open another command prompt and run the ``dist_main.py`` file. +# +# At this point, the initialization time in the output of ``dist_main.py`` will be significantly reduced. For example, the time taken here is only 0.02 seconds. +# +# .. code-block:: text +# +# Time taken for initialization: 0.018129825592041016 +# ... +# +# +# It's important to note that the above example uses ``host="localhost"`` and ``port=12345``, and both ``dist_main.py`` and ``dist_server.py`` are running on the same machine. In actual usage, ``dist_server.py`` can run on a different machine. In this case, ``host`` should be set to the IP address of the machine running ``dist_server.py``, and ``port`` should be set to any available port, ensuring that different machines can communicate over the network. +# +# Avoid Duplicate Initialization +# ------------------------------ +# +# In the code above, the ``to_dist`` function is called on an already initialized agent. The essence of ``to_dist`` is to clone the original agent to the agent server process, while retaining an ``RpcAgent`` in the main process as a proxy of the original agent. Calls to this ``RpcAgent`` will be forwarded to the corresponding agent in the agent server process. +# +# There is a potential issue with this approach: the original Agent is initialized twice—once in the main process and once in the agent server process—and these initializations are executed sequentially, which cannot be accelerated via parallelism. For Agents with low initialization costs, directly calling the ``to_dist`` function won't significantly affect performance. However, for agents with high initialization costs, it is important to avoid redundant initialization. Therefore, the AgentScope distributed mode offers an alternative method for distributed mode initialization, which allows directly passing the ``to_dist`` parameter within any Agent's initialization function, as shown in the modified example below: +# +# .. code-block:: python +# +# def init_with_dist(): +# return [WebAgent(f"W{i}", to_dist=True) for i in range(len(URLS))] +# +# +# def init_with_dist_independent(): +# return [WebAgent(f"W{i}", to_dist={"host": "localhost", "port": "12345"}) for i in range(len(URLS))] +# +# +# For the subprocess mode, you only need to pass ``to_dist=True`` in the initialization function. For the independent process mode, you need to pass the parameters that were originally passed to the ``to_dist`` function as a dictionary to the ``to_dist`` field. diff --git a/docs/tutorial/en/source/tutorial/examples.py b/docs/tutorial/en/source/tutorial/examples.py new file mode 100644 index 000000000..58e45be87 --- /dev/null +++ b/docs/tutorial/en/source/tutorial/examples.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +""" +Examples +======================== + +Configuration +------------------------ + +.. raw:: html + + + + + + + + + + +------------------------ + +Agent +------------------------ + +.. raw:: html + + + + + + + + + + + + + + + + +------------------------ + +Game +------------------------ + +.. raw:: html + + + + + + + + + + + +------------------------ + +Conversation +------------------------ + +.. raw:: html + + + + + + + + + + + +""" diff --git a/docs/tutorial/en/source/tutorial/faq.md b/docs/tutorial/en/source/tutorial/faq.md new file mode 100644 index 000000000..94e4262b3 --- /dev/null +++ b/docs/tutorial/en/source/tutorial/faq.md @@ -0,0 +1,49 @@ +# FQA + +## About AgentScope +_**Q**: What's the difference between AgentScope and other agent platforms/frameworks/packages?_ +
+**A**: AgentScope is a developer-centric and multi-agent platform, aiming to ease the development, deployment and monitoring of **multi-agent applications**. + +## About Model + +_**Q**: How to integrate/use a new model API in AgentScope?_ +
+**A**: Refer to Section [Integrating New LLM API](integrating_new_api). + +_**Q**: What LLMs does AgentScope support?_ +
+**A**: AgentScope supports most existing LLM APIs, including OpenAI, Claude, Gemini, DashScope, etc. Refer to Section [Model APIs](model_api) for support list. + +_**Q**: How to monitor the token usage in AgentScope?_ +
+**A**: Refer to Section [Monitoring Token Usage](token_usage) for more details. + +## About Tools + +_**Q**: What tools are provided in AgentScope?_ +
+**A**: Refer to Section [Tools](tools). + +_**Q**: How to use the tools in AgentScope?_ +
+**A**: AgentScope provides a `ServiceToolkit` module for tools usage. Refer to Section [Tools](tools) for detailed usage. + +## About Agents + +_**Q**: How to use agents in AgentScope?_ +
+**A**: You can use the built-in agents in AgentScope, or develop your own agents. Refer to Section [Built-in Agents](builtin-agent) for more details. + +## About GUI + +_**Q**: What GUI are provided in AgentScope?_ +
+**A**: AgentScope supports to run your applications in Gradio, and further provides a GUI, named AgentScope Studio, for you to monitor and manage your applications. + + +## About Low-code Development + +_**Q**: What's the low-code development in AgentScope?_ +
+**A**: It means that you can develop your applications by dragging and dropping components. Refer to [Low-code Development](low_code) for more details. diff --git a/docs/tutorial/en/source/tutorial/low_code.py b/docs/tutorial/en/source/tutorial/low_code.py new file mode 100644 index 000000000..d96e675ca --- /dev/null +++ b/docs/tutorial/en/source/tutorial/low_code.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" +.. _low-code: + +Low-code Development +=========================== +This tutorial introduces how to build multi-agent application with +drag-and-drop interface in AgentScope Workstation. + +Workstation +------------------ + +The workstation is now integrated in :ref:`agentscope-studio`. +It provides zero-code users with a much easier way to build multi-agent +applications. + +.. note:: Workstation is under active development, and the interface may change in the future. + +Staring Workstation +--------------------- + +First ensure you have installed the latest version of AgentScope. + +Launch AgentScope Studio to start the workstation by executing the following +python code: + +.. code-block:: python + + import agentscope + agentscope.studio.init() + +Or run the following bash command in the terminal: + +.. code-block:: bash + + as_studio + +Then visit AgentScope Studio at `https://127.0.0.1:5000`, and enter +Workstation by clicking the workstation icon in the sidebar. + + +* **Central workspace**: The main area where you can drag and drop components +to build your application. + +* **Top toolbox**: To import, export, check, and run your application. + +.. image:: https://img.alicdn.com/imgextra/i1/O1CN01RXAVVn1zUtjXVvuqS_!!6000000006718-1-tps-3116-1852.gif + +Explore Built-in Examples +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For beginners, we highly recommend starting with the pre-built examples to get +started. You have the option to directly click on an example to import it +into your central workspace. Alternatively, for a more structured learning +experience, you can opt to follow along with the tutorials linked to each +example. These tutorials will walk you through how each multi-agent +application is built on AgentScope Workstation step-by-step. + +Build Your Application +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To build an application, following these steps: + +* Choose & drag component: Click and drag your chosen component from sidebar into the central workspace area. + +* Connect nodes: Most nodes come with input and output points. Click on an output point of one component and drag it to an input point of another to create a message flow pipeline. This process allows different nodes to pass messages. + +* Configure nodes: After dropping your nodes into the workspace, click on any of them to fill in their configuration settings. You can customize the prompts, parameters, and other properties. + +Run Your Application +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Once the application is built, click on the “Run” button. Before running, the +workstation will check your application for any errors. If there are any, you +will be prompted to correct them before proceeding. After that, your +application will be executed in the same Python environment as the AgentScope +Studio, and you can find it in the Dashboard. + +Import or Export Your Application +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Workstation supports to import and export your application. Click the +“Export HTML” or “Export Python” button to generate code that you can +distribute to the community or save locally. If you want to convert the +exported code to Python, you can compile the JSON configuration to Python +code as follows: + +.. code-block:: bash + + # Compile + as_workflow config.json --compile ${YOUR_PYTHON_SCRIPT_NAME}.py + +If you want to run your local config directly, you can use the following +command: + +.. code-block:: bash + + # Run + as_gradio config.json + + +Want to edit your application further? Simply click the “Import HTML” button +to upload your previously exported HTML code back into the AgentScope +Workstation. + +Check Your Application +^^^^^^^^^^^^^^^^^^^^^^^^^ + +After building your application, you can click the “Check” button to verify the correctness of your application structure. The following checking rules will be performed: + +* Presence of Model and Agent: Every application must include at least one model node and one agent node. + +* Single Connection Policy: A component should not have more than one connection for each input. + +* Mandatory Fields Validation: All required input fields must be populated to ensure that each node has the necessary args to operate correctly. + +* Consistent Configuration Naming: The ‘Model config name’ used by Agent nodes must correspond to a ‘Config Name’ defined in a Model node. + +* Proper Node Nesting: Nodes like ReActAgent should only contain the tool nodes. Similarly, Pipeline nodes like IfElsePipeline should contain the correct number of elements (no more than 2), and ForLoopPipeline, WhileLoopPipeline, and MsgHub should follow the one-element-only rule (must be a SequentialPipeline as a child node). + +""" diff --git a/docs/tutorial/en/source/tutorial/message.py b/docs/tutorial/en/source/tutorial/message.py new file mode 100644 index 000000000..4454bb02e --- /dev/null +++ b/docs/tutorial/en/source/tutorial/message.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" +.. _message: + +Message +==================== + +Message is a specialized data structure for information exchange. +In AgentScope, we use message to communicate among agents. + +The most important fields of a message are: `name`, `role`, and `content`. +The `name and `role` fields identify the sender of the message, and the +`content` field contains the actual information. + +.. Note:: The `role` field must be chosen from `"system"`, `"assistant"` and `"user"`. +""" + +from agentscope.message import Msg +import json + +# %% +# Create a Message +# ---------------- +# Message can be created by specifying the `name`, `role`, and `content` fields. + + +msg = Msg( + name="Jarvis", + role="assistant", + content="Hi! How can I help you?", +) + +print(f'The sender of the message: "{msg.name}"') +print(f'The role of the sender: "{msg.role}"') +print(f'The content of the message: "{msg.content}"') + +# %% +# Serialize +# ---------------- +# Message can be serialized to a string in JSON format. + +serialized_msg = msg.to_dict() + +print(type(serialized_msg)) +print(json.dumps(serialized_msg, indent=4)) + +# %% +# Deserialize +# ---------------- +# Deserialize a message from a string in JSON format. + +new_msg = Msg.from_dict(serialized_msg) + +print(new_msg) +print(f'The sender of the message: "{new_msg.name}"') +print(f'The role of the sender: "{new_msg.role}"') +print(f'The content of the message: "{new_msg.content}"') diff --git a/docs/tutorial/en/source/tutorial/model.py b/docs/tutorial/en/source/tutorial/model.py new file mode 100644 index 000000000..1a6f12b84 --- /dev/null +++ b/docs/tutorial/en/source/tutorial/model.py @@ -0,0 +1,341 @@ +# -*- coding: utf-8 -*- +""" +.. _model_api: + +Model APIs +==================== + +AgentScope has integrated many popular model API libraries with different modalities. + +.. note:: 1. The text-to-speech (TTS) and speech-to-text (STT) APIs are not included in this list. You can refer to the section :ref:`tools`. + + 2. The section only introduces how to use or integrate different model APIs in AgentScope. The prompt requirements and prompt engineering strategies are left in the section :ref:`prompt-engineering`. + + +.. list-table:: + :header-rows: 1 + + * - API + - Chat + - Text Generation + - Vision + - Embedding + * - OpenAI + - ✓ + - ✗ + - ✓ + - ✓ + * - DashScope + - ✓ + - ✗ + - ✓ + - ✓ + * - Gemini + - ✓ + - ✗ + - ✗ + - ✓ + * - Ollama + - ✓ + - ✓ + - ✓ + - ✓ + * - Yi + - ✓ + - ✗ + - ✗ + - ✗ + * - LiteLLM + - ✓ + - ✗ + - ✗ + - ✗ + * - Zhipu + - ✓ + - ✗ + - ✗ + - ✓ + * - Anthropic + - ✓ + - ✗ + - ✗ + - ✗ + +There are two ways to use the model APIs in AgentScope. You can choose the one that suits you best. + +- **Use Configuration**: This is the recommended way to build model API-agnostic applications. You can change model API by modifying the configuration, without changing the code. +- **Initialize Model Explicitly**: If you only want to use a specific model API, initialize model explicitly is much more convenient and transparent to the developer. The API docstrings provide detailed information on the parameters and usage. + +.. tip:: Actually, using configuration and initializing model explicitly are equivalent. When you use the configuration, AgentScope just passes the key-value pairs in the configuration to initialize the model automatically. +""" +import os + +from agentscope.models import ( + DashScopeChatWrapper, + ModelWrapperBase, + ModelResponse, +) +import agentscope + +# %% +# Using Configuration +# ------------------------------ +# In a model configuration, the following three fields are required: +# +# - config_name: The name of the configuration. +# - model_type: The type of the model API, e.g. "dashscope_chat", "openai_chat", etc. +# - model_name: The name of the model, e.g. "qwen-max", "gpt-4o", etc. +# +# You should load the configurations before using the model APIs by calling `agentscope.init()` as follows: + +agentscope.init( + model_configs=[ + { + "config_name": "gpt-4o_temperature-0.5", + "model_type": "openai_chat", + "model_name": "gpt-4o", + "api_key": "xxx", + "temperature": 0.5, + }, + { + "config_name": "my-qwen-max", + "model_type": "dashscope_chat", + "model_name": "qwen-max", + }, + ], +) + +# %% +# For the other parameters, you + +# %% +# Initializing Model Explicitly +# -------------------------------- +# The available model APIs are modularized in the `agentscope.models` module. +# You can initialize a model explicitly by calling the corresponding model class. + +# print the modules under agentscope.models +for module_name in agentscope.models.__all__: + if module_name.endswith("Wrapper"): + print(module_name) + +# %% +# Taking DashScope Chat API as an example: + +model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", + api_key=os.environ["DASHSCOPE_API_KEY"], + stream=False, +) + +response = model( + messages=[ + {"role": "user", "content": "Hi!"}, + ], +) + +# %% +# The `response` is an object of `agentscope.models.ModelResponse`, which contains the following fields: +# +# - text: The generated text +# - embedding: The generated embeddings +# - image_urls: Refer to generated images +# - raw: The raw response from the API +# - parsed: The parsed response, e.g. load the text into a JSON object +# - stream: A generator that yields the response text chunk by chunk, refer to section :ref: `streaming` for more details. + +print(f"Text: {response.text}") +print(f"Embedding: {response.embedding}") +print(f"Image URLs: {response.image_urls}") +print(f"Raw: {response.raw}") +print(f"Parsed: {response.parsed}") +print(f"Stream: {response.stream}") + +# %% +# .. _integrating_new_api: +# +# Integrating New LLM API +# ---------------------------- +# There are two ways to integrate a new LLM API into AgentScope. +# +# OpenAI-Compatible APIs +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# If your model is compatible with OpenAI Python API, reusing the `OpenAIChatWrapper` class with specific parameters is recommended. +# +# .. note:: You should take care of the messages format manually. +# +# Taking `vLLM `_, an OpenAI-comptaible LLM inference engine, as an example, +# its `official doc `_ provides the following example for OpenAI Python client library: +# +# .. code-block:: python +# +# from openai import OpenAI +# client = OpenAI( +# base_url="http://localhost:8000/v1", +# api_key="token-abc123", +# ) +# +# completion = client.chat.completions.create( +# model="NousResearch/Meta-Llama-3-8B-Instruct", +# messages=[ +# {"role": "user", "content": "Hello!"} +# ], +# temperature=0.5, +# ) +# +# print(completion.choices[0].message) +# +# +# It's very easy to integrate vLLM into AgentScope as follows: +# +# - put the parameters for initializing OpenAI client (except `api_key`) into `client_args`, and +# - the parameters for generating completions (expect `model`) into `generate_args`. +# + +vllm_model_config = { + "model_type": "openai_chat", + "config_name": "vllm_llama2-7b-chat-hf", + "model_name": "meta-llama/Llama-2-7b-chat-hf", + "api_key": "token-abc123", # The API key + "client_args": { + "base_url": "http://localhost:8000/v1/", # Used to specify the base URL of the API + }, + "generate_args": { + "temperature": 0.5, # The generation parameters, e.g. temperature, seed + }, +} + +# %% +# Or, directly initialize the OpenAI Chat model wrapper with the parameters: + +from agentscope.models import OpenAIChatWrapper + +model = OpenAIChatWrapper( + config_name="", + model_name="meta-llama/Llama-2-7b-chat-hf", + api_key="token-abc123", + client_args={"base_url": "http://localhost:8000/v1/"}, + generate_args={"temperature": 0.5}, +) + +# %% +# RESTful APIs +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# If your model is accessed via RESTful post API, and OpenAI-compatible in response format, consider to use the `PostAPIChatWrapper`. +# +# Taking the following curl command as an example, just extract the **header**, **API URL**, and **data** (except `messages`, which will be passed automatically) as the parameters for initializing the model wrapper. +# +# For an example post request: +# +# .. code-block:: bash +# +# curl https://api.openai.com/v1/chat/completions +# -H "Content-Type: application/json" +# -H "Authorization: Bearer $OPENAI_API_KEY" +# -d '{ +# "model": "gpt-4o", +# "messages": [ +# {"role": "user", "content": "write a haiku about ai"} +# ] +# }' +# +# The corresponding model wrapper initialization is as follows: + +from agentscope.models import PostAPIChatWrapper + +post_api_model = PostAPIChatWrapper( + config_name="", + api_url="https://api.openai.com/v1/chat/completions", # The target URL + headers={ + "Content-Type": "application/json", # From the header + "Authorization": "Bearer $OPENAI_API_KEY", + }, + json_args={ + "model": "gpt-4o", # From the data + }, +) + +# %% +# Its model configuration is as follows: + +post_api_config = { + "config_name": "{my_post_model_config_name}", + "model_type": "post_api_chat", + "api_url": "https://api.openai.com/v1/chat/completions", + "headers": { + "Authorization": "Bearer {YOUR_API_TOKEN}", + }, + "json_args": { + "model": "gpt-4o", + }, +} + +# %% +# If your model API response format is different from OpenAI API, you can inherit from `PostAPIChatWrapper` and override the `_parse_response` method to adapt to your API response format. +# +# .. note:: You need to define a new `model_type` field in the subclass to distinguish it from the existing model wrappers. +# +# + + +class MyNewModelWrapper(PostAPIChatWrapper): + model_type: str = "{my_new_model_type}" + + def _parse_response(self, response: dict) -> ModelResponse: + """Parse the response from the API server. + + Args: + response (`dict`): + The response obtained from API server and parsed by + `response.json()` to unify the format. + + Return (`ModelResponse`): + The parsed response. + """ + # TODO: Replace by your own parsing logic + return ModelResponse( + text=response["data"]["response"]["choices"][0]["message"][ + "content" + ], + ) + + +# %% +# From Scratch +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# If you decide to implement a new model API from scratch, you need to know the following concepts in AgentScope: +# +# - **model_type**: When using model configurations, AgentScope uses the `model_type` field to distinguish different model APIs. So ensure your new model wrapper class has a unique `model_type`. +# - **__init__**: When initializing from configuration, AgentScope passes all the key-value pairs in the configuration to the `__init__` method of the model wrapper. So ensure your `__init__` method can handle all the parameters in the configuration. +# - **__call__**: The core method of the model wrapper is `__call__`, which takes the input messages and returns the response. Its return value should be an object of `ModelResponse`. + + +class MyNewModelWrapper(ModelWrapperBase): + model_type: str = "{my_new_model_type}" + + def __init__(self, config_name, model_name, **kwargs) -> None: + super().__init__(config_name, model_name=model_name) + + # TODO: Initialize your model here + + def __call__(self, *args, **kwargs) -> ModelResponse: + # TODO: Implement the core logic of your model here + + return ModelResponse( + text="Hello, World!", + ) + + +# %% +# .. tip:: Optionally, you can implement a format method to format the prompt before sending it to the model API. +# Refer to :ref:`prompt-engineering` for more details. +# +# Further Reading +# --------------------- +# - :ref:`prompt-engineering` +# - :ref:`streaming` +# - :ref:`structured-output` diff --git a/docs/tutorial/en/source/tutorial/monitor.py b/docs/tutorial/en/source/tutorial/monitor.py new file mode 100644 index 000000000..54a1cd9d8 --- /dev/null +++ b/docs/tutorial/en/source/tutorial/monitor.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +""" +.. _configuring_and_monitoring: + +Configuring and Monitoring +================================== + +The main entry of AgentScope is `agentscope.init`, where you can configure your application. +""" + +import agentscope + + +agentscope.init( + model_configs=[ + { # The model configurations + "config_name": "my-qwen-max", + "model_type": "dashscope_chat", + "model_name": "qwen-max", + }, + ], + project="Project Alpha", # The project name + name="Test-1", # The runtime name + disable_saving=False, # The main switch to disable saving + save_dir="./runs", # The saving directory + save_log=True, # Save the logging or not + save_code=False, # Save the code for this runtime + save_api_invoke=False, # Save the API invocation + cache_dir="~/.cache", # The cache directory, used for caching embeddings and so on + use_monitor=True, # Monitor the token usage or not + logger_level="INFO", # The logger level +) + +# %% +# Exporting the configuration +# -------------------------------- +# The `state_dict` method can be used to export the configuration of the running application. + +import json + +print(json.dumps(agentscope.state_dict(), indent=2)) + +# %% +# Monitoring the Runtime +# -------------------------- +# AgentScope provides AgentScope Studio, a web visual interface to monitor and manage the running applications and histories. +# Refer to section :ref:`visual` for more details. + +# %% +# .. _token_usage: +# +# Monitoring Token Usage +# ------------------------ +# `print_llm_usage` will print and return the token usage of the current running application. + +from agentscope.models import DashScopeChatWrapper + +qwen_max = DashScopeChatWrapper( + config_name="-", + model_name="qwen-max", +) +qwen_plus = DashScopeChatWrapper( + config_name="-", + model_name="qwen-plus", +) + +# Call qwen-max and qwen-plus to simulate the token usage +_ = qwen_max([{"role": "user", "content": "Hi!"}]) +_ = qwen_plus([{"role": "user", "content": "Who are you?"}]) + +usage = agentscope.print_llm_usage() + +print(json.dumps(usage, indent=2)) diff --git a/docs/tutorial/en/source/tutorial/multimodality.py b/docs/tutorial/en/source/tutorial/multimodality.py new file mode 100644 index 000000000..617c15f61 --- /dev/null +++ b/docs/tutorial/en/source/tutorial/multimodality.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +""" +.. _multimodality: + +MultiModality +============================ + +In this section, we will show how to build multimodal applications in AgentScope with two examples. + +- The first example demonstrates how to use vision LLMs within an agent, and +- the second example shows how to use text to image generation within an agent. + +Building Vision Agent +------------------------------ + +For most LLM APIs, the vision and non-vision LLMs share the same APIs, and only differ in the input format. +In AgentScope, the `format` function of the model wrapper is responsible for converting the input `Msg` objects into the required format for vision LLMs. + +That is, we only need to specify the vision LLM without changing the agent's code. +Taking "qwen-vl-max" as an example, its model configuration is the same as the non-vision LLMs in DashScope Chat API. + +Refer to section :ref:`model_api` for the vision LLM APIs supported in AgentScope. +""" + +model_config = { + "config_name": "my-qwen-vl", + "model_type": "dashscope_multimodal", + "model_name": "qwen-vl-max", +} + +# %% +# +# As usual, we initialize AgentScope with the above configuration, and create a new agent with the vision LLM. + +from agentscope.agents import DialogAgent +import agentscope + +agentscope.init(model_configs=model_config) + +agent = DialogAgent( + name="Monday", + sys_prompt="You're a helpful assistant named Monday.", + model_config_name="my-qwen-vl", +) + +# %% +# To communicate with the vision agent with pictures, `Msg` class provides an `url` field. +# You can put both local or online image URL(s) in the `url` field. +# +# Let's first create an image with matplotlib + +import matplotlib.pyplot as plt + +plt.figure(figsize=(6, 6)) +plt.bar(range(3), [2, 1, 4]) +plt.xticks(range(3), ["Alice", "Bob", "Charlie"]) +plt.title("The Apples Each Person Has in 2023") +plt.xlabel("Number of Apples") + +plt.show() +plt.savefig("./bar.png") + +# %% +# Then, we create a `Msg` object with the image URL + +from agentscope.message import Msg + +msg = Msg( + name="User", + content="Describe the attached image for me.", + role="user", + url="./bar.png", +) + +# %% +# After that, we can send the message to the vision agent and get the response. + +response = agent(msg) diff --git a/docs/tutorial/en/source/tutorial/prompt.py b/docs/tutorial/en/source/tutorial/prompt.py new file mode 100644 index 000000000..604dc6d6b --- /dev/null +++ b/docs/tutorial/en/source/tutorial/prompt.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +""" +.. _prompt-engineering: + +Prompt Engineering +================================ + +The prompt engineering is a crucial part of LLM-empowered applications, +especially for the multi-agent ones. +However, most API providers focus on the chatting scenario, where a user and +an assistant speak alternately. + +To support multi-agent applications, AgentScope builds different prompt +strategies to convert a list of `Msg` objects to the required format. + +.. note:: There is no **one-size-fits-all** solution for prompt crafting. + The goal of built-in strategies is to **enable beginners to smoothly invoke + the model API, rather than achieve the best performance**. + For advanced users, we highly recommend developers to customize prompts + according to their needs and model API requirements. + +Using Built-in Strategy +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The built-in prompt strategies are implemented in the `format` method of the +model objects. Taking DashScope Chat API as an example: + +""" + +from agentscope.models import DashScopeChatWrapper +from agentscope.message import Msg +import json + + +model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", +) + +# `Msg` objects or a list of `Msg` objects can be passed to the `format` method +prompt = model.format( + Msg("system", "You're a helpful assistant.", "system"), + [ + Msg("assistant", "Hi!", "assistant"), + Msg("user", "Nice to meet you!", "user"), + ], +) + +print(json.dumps(prompt, indent=4, ensure_ascii=False)) + +# %% +# After formatting the input messages, we can input the prompt into the model +# object. + +response = model(prompt) + +print(response.text) + +# %% +# Non-Vision Models +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# In the following table, we list the built-in prompt strategies, as well as +# the prefix of supported LLMs. +# +# Taking the following messages as an example: +# +# .. code-block:: python +# +# Msg("system", "You're a helpful assistant named Alice.", "system"), +# Msg("Alice", "Hi!", "assistant"), +# Msg("Bob", "Nice to meet you!", "user") +# +# +# .. list-table:: +# :header-rows: 1 +# +# * - LLMs +# - `model_name` +# - Constructed Prompt +# * - OpenAI LLMs +# - `gpt-` +# - .. code-block:: python +# +# [ +# { +# "role": "system", +# "name": "system", +# "content": "You're a helpful assistant named Alice." +# }, +# { +# "role": "user", +# "name": "Alice", +# "content": "Hi!" +# }, +# { +# "role": "user", +# "name": "Bob", +# "content": "Nice to meet you!" +# } +# ] +# * - Gemini LLMs +# - `gemini-` +# - .. code-block:: python +# +# [ +# { +# "role": "user", +# "parts": [ +# "You're a helpful assistant named Alice.\\n## Conversation History\\nAlice: Hi!\\nBob: Nice to meet you!" +# ] +# } +# ] +# * - All other LLMs +# +# (e.g. DashScope, ZhipuAI ...) +# - +# - .. code-block:: python +# +# [ +# { +# "role": "system", +# "content": "You're a helpful assistant named Alice." +# }, +# { +# "role": "user", +# "content": "## Conversation History\\nAlice: Hi!\\nBob: Nice to meet you!" +# } +# ] +# +# .. tip:: Considering some API libraries can support different LLMs (such as OpenAI Python library), AgentScope uses the `model_name` field to distinguish different models and decides the used strategy. +# +# Vision Models +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# For vision models, AgentScope currently supports OpenAI vision models and +# Dashscope mutli modal API. +# The more supported APIs will be added in the future. diff --git a/docs/tutorial/en/source/tutorial/prompt_optimization.py b/docs/tutorial/en/source/tutorial/prompt_optimization.py new file mode 100644 index 000000000..843718e0d --- /dev/null +++ b/docs/tutorial/en/source/tutorial/prompt_optimization.py @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- +""" +.. _system-prompt-optimization: + +System Prompt Optimization +============================ + +AgentScope implements a module for optimizing Agent System Prompts. + +.. _system-prompt-generator: + +System Prompt Generator +^^^^^^^^^^^^^^^^^^^^^^^^ + +The system prompt generator uses a meta prompt to guide the LLM to generate +the system prompt according to the user's requirements, and allow the +developers to use built-in examples or provide their own examples as In +Context Learning (ICL). + +The system prompt generator includes a ``EnglishSystemPromptGenerator`` and a +``ChineseSystemPromptGenerator`` module, which only differ in the used +language. + +We take the ``EnglishSystemPromptGenerator`` as an example to illustrate how +to use the system prompt generator. + +Initialization +^^^^^^^^^^^^^^^^^^^^^^^^ + +To initialize the generator, you need to first register your model +configurations in the ``agentscope.init`` function. +""" + +from agentscope.prompt import EnglishSystemPromptGenerator +import agentscope + +model_config = { + "model_type": "dashscope_chat", + "config_name": "qwen_config", + "model_name": "qwen-max", + # export your api key via environment variable +} + +# %% +# The generator will use a built-in default meta prompt to guide the LLM to +# generate the system prompt. + + +agentscope.init( + model_configs=model_config, +) + +prompt_generator = EnglishSystemPromptGenerator( + model_config_name="qwen_config", +) + + +# %% +# Users are welcome to freely try different optimization methods. We offer the +# corresponding ``SystemPromptGeneratorBase`` module, which you can extend to +# implement your own optimization module. +# +# Generation +# ^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Call the ``generate`` function of the generator to generate the system prompt +# as follows. +# +# You can input a requirement, or your system prompt to be optimized. + +generated_system_prompt = prompt_generator.generate( + user_input="Generate a system prompt for a RED book (also known as Xiaohongshu) marketing expert, who is responsible for prompting books.", +) + +print(generated_system_prompt) + +# %% +# Generation with In Context Learning +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# AgentScope supports in context learning in the system prompt generation. +# +# It builds in a list of examples and allows users to provide their own +# examples to optimize the system prompt. +# +# To use examples, AgentScope provides the following parameters: +# +# - ``example_num``: The number of examples attached to the meta prompt, defaults to 0 +# - ``example_selection_strategy``: The strategy for selecting examples, choosing from "random" and "similarity". +# - ``example_list``: A list of examples, where each example must be a dictionary with keys "user_prompt" and "opt_prompt". If not specified, the built-in example list will be used. +# +# Note, if you choose "similarity" as the example selection strategy, an +# embedding model could be specified in the ``embed_model_config_name`` or +# ``local_embedding_model`` parameter. +# +# Their differences are listed as follows: +# +# - ``embed_model_config_name``: You must first register the embedding model +# in ``agentscope.init`` and specify the model configuration name in this +# parameter. +# - ``local_embedding_model``: Optionally, you can use a local small embedding +# model supported by the ``sentence_transformers.SentenceTransformer`` library. +# +# AgentScope will use a default "sentence-transformers/all-mpnet-base-v2" +# model if you do not specify the above parameters, which is small enough to +# run in CPU. + +icl_generator = EnglishSystemPromptGenerator( + model_config_name="qwen_config", + example_num=3, + example_selection_strategy="random", +) + +icl_generated_system_prompt = icl_generator.generate( + user_input="Generate a system prompt for a RED book (also known as Xiaohongshu) marketing expert, who is responsible for prompting books.", +) + +print(icl_generated_system_prompt) + +# %% +# .. note:: 1. The example embeddings will be cached in ``~/.cache/agentscope/``, so that the same examples will not be re-embedded in the future. +# 2. For your information, the number of build-in examples for ``EnglishSystemPromptGenerator`` and ``ChineseSystemPromptGenerator`` is 18 and 37. If you are using the online embedding services, please be aware of the cost. diff --git a/docs/tutorial/en/source/tutorial/quickstart.py b/docs/tutorial/en/source/tutorial/quickstart.py new file mode 100644 index 000000000..92b3a6f6c --- /dev/null +++ b/docs/tutorial/en/source/tutorial/quickstart.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +""" +.. _quickstart: + +Quick Start +============================ + +AgentScope requires Python 3.9 or higher. You can install from source or pypi. + +From PyPI +---------------- +.. code-block:: bash + + pip install agentscope + +From Source +---------------- +To install AgentScope from source, you need to clone the repository from +GitHub and install by the following commands + +.. code-block:: bash + + git clone https://github.com/modelscope/agentscope + cd agentscope + pip install -e . + +To ensure AgentScope is installed normally. You can execute the following code: +""" + +import agentscope + +print(agentscope.__version__) + +# %% +# Extra Dependencies +# ---------------------------- +# +# AgentScope provides extra dependencies for different demands. You can +# install them according to your demands. +# +# - ollama: Ollama API +# - litellm: Litellm API +# - zhipuai: Zhipuai API +# - gemini: Gemini API +# - anthropic: Anthropic API +# - service: The dependencies for different tool functions +# - distribute: The dependencies for distribution mode +# - full: All the dependencies +# +# Taking distribution mode as an example, the installation command differs +# according to your operation OS. +# +# For Windows users: +# +# .. code-block:: bash +# +# pip install agentscope[gemini] +# # or +# pip install agentscope[ollama,distribute] +# +# For Mac and Linux users: +# +# .. code-block:: bash +# +# pip install agentscope\[gemini\] +# # or +# pip install agentscope\[ollama,distribute\] diff --git a/docs/sphinx_doc/en/source/tutorial/210-rag.md b/docs/tutorial/en/source/tutorial/rag.md similarity index 76% rename from docs/sphinx_doc/en/source/tutorial/210-rag.md rename to docs/tutorial/en/source/tutorial/rag.md index 39c3ecce0..cd53fff9e 100644 --- a/docs/sphinx_doc/en/source/tutorial/210-rag.md +++ b/docs/tutorial/en/source/tutorial/rag.md @@ -1,72 +1,70 @@ -(210-rag-en)= - -# A Quick Introduction to RAG in AgentScope +# RAG We want to introduce three concepts related to RAG in AgentScope: Knowledge, KnowledgeBank and RAG agent. -### Knowledge The Knowledge modules (now only `LlamaIndexKnowledge`; support for LangChain will come soon) are responsible for handling all RAG-related operations. -#### How to create a Knowledge object - A Knowledge object can be created with a JSON configuration to specify 1) data path, 2) data loader, 3) data preprocessing methods, and 4) embedding model (model config name). - A detailed example can refer to the following: -
- A detailed example of Knowledge object configuration - - ```json - [ +## Creating Knowledge +A Knowledge object can be created with a JSON configuration to specify 1) data path, 2) data loader, 3) data preprocessing methods, and 4) embedding model (model config name). +A detailed example can refer to the following: +
+ A detailed example of Knowledge object configuration + +```json +[ +{ +"knowledge_id": "{your_knowledge_id}", +"emb_model_config_name": "{your_embed_model_config_name}", +"data_processing": [ { - "knowledge_id": "{your_knowledge_id}", - "emb_model_config_name": "{your_embed_model_config_name}", - "data_processing": [ - { - "load_data": { - "loader": { - "create_object": true, - "module": "llama_index.core", - "class": "SimpleDirectoryReader", - "init_args": { - "input_dir": "{path_to_your_data_dir_1}", - "required_exts": [".md"] - } - } + "load_data": { + "loader": { + "create_object": true, + "module": "llama_index.core", + "class": "SimpleDirectoryReader", + "init_args": { + "input_dir": "{path_to_your_data_dir_1}", + "required_exts": [".md"] } - }, - { - "load_data": { - "loader": { - "create_object": true, - "module": "llama_index.core", - "class": "SimpleDirectoryReader", - "init_args": { - "input_dir": "{path_to_your_python_code_data_dir}", - "recursive": true, - "required_exts": [".py"] - } - } - }, - "store_and_index": { - "transformations": [ - { - "create_object": true, - "module": "llama_index.core.node_parser", - "class": "CodeSplitter", - "init_args": { - "language": "python", - "chunk_lines": 100 - } - } - ] + } + } + }, + { + "load_data": { + "loader": { + "create_object": true, + "module": "llama_index.core", + "class": "SimpleDirectoryReader", + "init_args": { + "input_dir": "{path_to_your_python_code_data_dir}", + "recursive": true, + "required_exts": [".py"] } } - ] + }, + "store_and_index": { + "transformations": [ + { + "create_object": true, + "module": "llama_index.core.node_parser", + "class": "CodeSplitter", + "init_args": { + "language": "python", + "chunk_lines": 100 + } + } + ] + } } - ] - ``` +] +} +] +``` -
+
+ +### Configuring Knowledge -#### More about knowledge configurations The aforementioned configuration is usually saved as a JSON file, it musts contain the following key attributes, * `knowledge_id`: a unique identifier of the knowledge; @@ -75,7 +73,8 @@ contain the following key attributes, * `chunk_overlap`: default chunk overlap for each chunk (node); * `data_processing`: a list of data processing methods. -##### Using LlamaIndexKnowledge as an example + +#### Using LlamaIndexKnowledge Regarding the last attribute `data_processing`, each entry of the list (which is a dict) configures a data loader object that loads the needed data (i.e. `load_data`), @@ -107,7 +106,7 @@ For more information about the node parsers, please refer to [here](https://docs If users want to avoid the detailed configuration, we also provide a quick way in `KnowledgeBank` (see the following). -#### How to use a Knowledge object +### How to use a Knowledge object After a knowledge object is created successfully, users can retrieve information related to their queries by calling `.retrieve(...)` function. The `.retrieve` function accepts at least three basic parameters: * `query`: input that will be matched in the knowledge; @@ -116,7 +115,7 @@ The `.retrieve` function accepts at least three basic parameters: *Advanaced:* In `LlamaIndexKnowledge`, it also supports users passing their own retriever to retrieve from knowledge. -#### More details inside `LlamaIndexKnowledge` +### More details inside `LlamaIndexKnowledge` Here, we will use `LlamaIndexKnowledge` as an example to illustrate the operation within the `Knowledge` module. When a `LlamaIndexKnowledge` object is initialized, the `LlamaIndexKnowledge.__init__` will go through the following steps: * It processes data and prepare for retrieval in `LlamaIndexKnowledge._data_to_index(...)`, which includes @@ -126,39 +125,36 @@ When a `LlamaIndexKnowledge` object is initialized, the `LlamaIndexKnowledge.__i * If the indexing already exists, then `LlamaIndexKnowledge._load_index(...)` will be invoked to load the index and avoid repeating embedding calls.
-### Knowledge Bank +## Knowledge Bank The knowledge bank maintains a collection of Knowledge objects (e.g., on different datasets) as a set of *knowledge*. Thus, different agents can reuse the Knowledge object without unnecessary "re-initialization". Considering that configuring the Knowledge object may be too complicated for most users, the knowledge bank also provides an easy function call to create Knowledge objects. - * `KnowledgeBank.add_data_as_knowledge`: create Knowledge object. An easy way only requires to provide `knowledge_id`, `emb_model_name` and `data_dirs_and_types`. - As knowledge bank process files as `LlamaIndexKnowledge` by default, all text file types are supported, such as `.txt`, `.html`, `.md`, `.csv`, `.pdf` and all code file like `.py`. File types other than the text can refer to [LlamaIndex document](https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/). - ```python - knowledge_bank.add_data_as_knowledge( - knowledge_id="agentscope_tutorial_rag", - emb_model_name="qwen_emb_config", - data_dirs_and_types={ - "../../docs/sphinx_doc/en/source/tutorial": [".md"], - }, - ) - ``` - More advance initialization, users can still pass a knowledge config as a parameter `knowledge_config`: - ```python - # load knowledge_config as dict - knowledge_bank.add_data_as_knowledge( - knowledge_id=knowledge_config["knowledge_id"], - emb_model_name=knowledge_config["emb_model_config_name"], - knowledge_config=knowledge_config, - ) - ``` +* `KnowledgeBank.add_data_as_knowledge`: create Knowledge object. An easy way only requires to provide `knowledge_id`, `emb_model_name` and `data_dirs_and_types`. +As knowledge bank process files as `LlamaIndexKnowledge` by default, all text file types are supported, such as `.txt`, `.html`, `.md`, `.csv`, `.pdf` and all code file like `.py`. File types other than the text can refer to [LlamaIndex document](https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/). +```python +knowledge_bank.add_data_as_knowledge( + knowledge_id="agentscope_tutorial_rag", + emb_model_name="qwen_emb_config", + data_dirs_and_types={ + "../../docs/sphinx_doc/en/source/tutorial": [".md"], + }, +) +``` +More advance initialization, users can still pass a knowledge config as a parameter `knowledge_config`: +```python +# load knowledge_config as dict +knowledge_bank.add_data_as_knowledge( + knowledge_id=knowledge_config["knowledge_id"], + emb_model_name=knowledge_config["emb_model_config_name"], + knowledge_config=knowledge_config, +) +``` * `KnowledgeBank.get_knowledge`: It accepts two parameters, `knowledge_id` and `duplicate`. It will return a knowledge object with the provided `knowledge_id`; if `duplicate` is true, the return will be deep copied. * `KnowledgeBank.equip`: It accepts three parameters, `agent`, `knowledge_id_list` and `duplicate`. The function will provide knowledge objects according to the `knowledge_id_list` and put them into `agent.knowledge_list`. If `duplicate` is true, the assigned knowledge object will be deep copied first. - - - -### RAG agent +## RAG agent RAG agent is an agent that can generate answers based on the retrieved knowledge. * Agent using RAG: a RAG agent has a list of knowledge objects (`knowledge_list`). * RAG agent can be initialized with a `knowledge_list` @@ -295,8 +291,3 @@ model = PostAPIEmbeddingWrapper( print(model("testing")) ``` - -[[Back to the top]](#210-rag-en) - - - diff --git a/docs/tutorial/en/source/tutorial/streaming.py b/docs/tutorial/en/source/tutorial/streaming.py new file mode 100644 index 000000000..8d789da54 --- /dev/null +++ b/docs/tutorial/en/source/tutorial/streaming.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +""" +.. _streaming: + +Streaming Mode +========================= + +AgentScope supports streaming output for the following APIs in both terminal +and AgentScope Studio. + +.. list-table:: + :header-rows: 1 + + * - API + - Class + - Streaming + * - OpenAI Chat API + - `OpenAIChatWrapper` + - ✓ + * - DashScope Chat API + - `DashScopeChatWrapper` + - ✓ + * - Gemini Chat API + - `GeminiChatWrapper` + - ✓ + * - ZhipuAI Chat API + - `ZhipuAIChatWrapper` + - ✓ + * - Ollama Chat API + - `OllamaChatWrapper` + - ✓ + * - LiteLLM Chat API + - `LiteLLMChatWrapper` + - ✓ + * - Anthropic Chat API + - `AnthropicChatWrapper` + - ✓ + +This section will show how to enable streaming mode in AgentScope and handle +the streaming response within an agent. +""" + +# %% +# Enabling Streaming Output +# ---------------------------- +# +# AgentScope supports streaming output by providing a `stream` parameter +# in model wrapper class. +# You can directly specify the `stream` parameter in initialization or +# configuration. +# +# - Specifying in Initialization + +from agentscope.models import DashScopeChatWrapper +import os + +model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", + api_key=os.environ["DASHSCOPE_API_KEY"], + stream=True, # Enabling the streaming output +) + +# %% +# - Specifying in Configuration + +model_config = { + "model_type": "dashscope_chat", + "config_name": "qwen_config", + "model_name": "qwen-max", + "stream": True, +} + +# %% +# With the above configuration, we can obtain streaming output with built-in +# agents in AgentScope. +# +# Next, we show how to handle the streaming output within an agent. + +# %% +# Handling Streaming Response +# ------------------------------------------- +# +# Once we enable the streaming output, the returned model response will +# contain a generator in its `stream` field. + +prompt = [{"role": "user", "content": "Hi!"}] + +response = model(prompt) +print("The type of response.stream:", type(response.stream)) + +# %% +# We can iterate over the generator to get the streaming text. +# A boolean value will also be yielded to indicate whether the current +# chunk is the last one. + +for index, chunk in enumerate(response.stream): + print(f"{index}.", chunk) + print(f"Current text field:", response.text) + +# %% +# .. note:: Note the generator is incremental and one-time. +# +# During the iterating, the `text` field in the response will concatenate +# sub strings automatically. +# +# To be compatible with non-streaming mode, you can also directly use +# `response.text` to obtain all text at once. + +prompt = [{"role": "user", "content": "Hi!"}] +response = model(prompt) +print(response.text) + +# %% +# Displaying Like Typewriter +# ------------------------------------------- +# To display the streaming text like a typewriter, AgentScope provides a +# `speak` function within the `AgentBase` class. +# If a generator is given, the `speak` function will iterate over the +# generator and print the text like a typewriter in terminal or AgentScope +# Studio. +# +# .. code-block:: python +# +# def reply(*args, **kwargs): +# # ... +# self.speak(response.stream) +# # ... +# +# To be compatible with both streaming and non-streaming mode, we use the +# following code snippet for all built-in agents in AgentScope. +# +# .. code-block:: python +# +# def reply(*args, **kwargs): +# # ... +# self.speak(response.stream or response.text) +# # ... +# diff --git a/docs/tutorial/en/source/tutorial/structured_output.py b/docs/tutorial/en/source/tutorial/structured_output.py new file mode 100644 index 000000000..922201b7f --- /dev/null +++ b/docs/tutorial/en/source/tutorial/structured_output.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- +""" +.. _structured-output: + +Structured Output +========================== + +In this tutorial, we will be building a simple agent that outputs structured +data in JSON dictionary format using the `agentscope.parsers` module. +""" +from agentscope.models import ModelResponse + +# %% +# Defining the Parser +# ------------------- + +from agentscope.parsers import MarkdownJsonDictParser + + +parser = MarkdownJsonDictParser( + content_hint='{"thought": "What you thought", "speak": "What you speak to the user"}', + required_keys=["thought", "speak"], +) + + +# %% +# The parser will generate a format instruction according to your input. You +# can use the `format_instruction` property to in your prompt to guide LLM to +# generate the desired output. + +print(parser.format_instruction) + +# %% +# Parsing the Output +# ------------------- +# When receiving output from LLM, use `parse` method to extract the +# structured data. +# It takes an object of `agentscope.models.ModelResponse` as input, parses +# the value of the `text` field, and returns a parsed dictionary in the +# `parsed` field. + +dummy_response = ModelResponse( + text="""```json +{ + "thought": "I should greet the user", + "speak": "Hi! How can I help you?" +} +```""", +) + +print(f"parsed field before parsing: {dummy_response.parsed}") + +parsed_response = parser.parse(dummy_response) + +print(f"parsed field after parsing: {parsed_response.parsed}") +print(type(parsed_response.parsed)) + +# %% +# Error Handling +# ------------------- +# If the LLM output does not match the expected format, the parser will raise +# an error with a detailed message. +# So developers can present the error message to LLM to guide it to correct +# the output. + +error_response = ModelResponse( + text="""```json +{ + "thought": "I should greet the user" +} +```""", +) + +try: + parsed_response = parser.parse(error_response) +except Exception as e: + print(e) + +# %% +# Advanced Usage +# ------------------- +# More Complex Content +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# Asking LLM to directly generate a JSON dictionary can be challenging, +# especially when the JSON content is complex (e.g. code snippets, nested +# structures). +# In this case, you can use more advanced parsers to guide LLM to generate +# the desired output. +# Here is an example of a more complex parser that handle code snippets. + +from agentscope.parsers import RegexTaggedContentParser + +parser = RegexTaggedContentParser( + format_instruction="""Response in the following format: +what you thought +A random number here +your python code here +""", + try_parse_json=True, # Try to parse the each value as a JSON object + required_keys=[ + "thought", + "number", + "code", + ], # Required keys in the parsed dictionary +) + +print(parser.format_instruction) + +# %% +# The `RegexTaggedContentParser` uses regular expressions to match the tagged +# content in the text and return the parsed dictionary. +# +# .. note:: The parsed output of `RegexTaggedContentParser` is a dictionary, which means the required keys should be unique. +# You can also change the regular expression pattern by settings the `tagged_content_pattern` parameter when initializing the parser. + +import json + +dummy_response = ModelResponse( + text="""Print the current date +42 +import datetime +print(datetime.datetime.now()) + +""", +) + +parsed_response = parser.parse(dummy_response) + +print("The type of parsed response: ", type(parsed_response.parsed)) +print("The type of the number: ", type(parsed_response.parsed["number"])) +print(json.dumps(parsed_response.parsed, indent=4)) + +# %% +# Auto Post-Processing +# ^^^^^^^^^^^^^^^^^^^^ +# +# Within the parsed dictionary, different keys may require different +# post-processing steps. +# For example, in a werewolf game, the LLM is playing the role of a seer, and +# the output should contain the following keys: +# +# - `thought`: The seer's thoughts +# - `speak`: The seer's speech +# - `use_ability`: A boolean value indicating whether the seer should use its ability +# +# In this case, the `thought` and `speak` contents should be stored in the +# agent's memory to ensure the consistency of the agent's behavior. +# The `speak` content should be spoken out to the user. +# The `use_ability` key should be accessed outside the agent easily to +# determine the game flow. +# +# AgentScope supports automatic post-processing of the parsed dictionary by +# providing the following parameters when initializing the parser. +# +# - `keys_to_memory`: key(s) that should be stored in the agent's memory +# - `keys_to_content`: key(s) that should be spoken out +# - `keys_to_metadata`: key(s) that should be stored in the metadata field of the agent's response message +# +# .. note:: If a string is provided, the parser will extract the value of the given key from the parsed dictionary. If a list of strings is provided, a sub-dictionary will be created with the given keys. +# +# Here is an example of using the `MarkdownJsonDictParser` to automatically +# post-process the parsed dictionary. + +parser = MarkdownJsonDictParser( + content_hint='{"thought": "what you thought", "speak": "what you speak", "use_ability": "whether to use the ability"}', + keys_to_memory=["thought", "speak"], + keys_to_content="speak", + keys_to_metadata="use_ability", +) + +dummy_response = ModelResponse( + text="""```json +{ + "thought": "I should ...", + "speak": "I will not use my ability", + "use_ability": false +}``` +""", +) + +parsed_response = parser.parse(dummy_response) + +print("The parsed response: ", parsed_response.parsed) +print("To memory", parser.to_memory(parsed_response.parsed)) +print("To message content: ", parser.to_content(parsed_response.parsed)) +print("To message metadata: ", parser.to_metadata(parsed_response.parsed)) + +# %% +# Here we show how to create an agent that can automatically post-process the +# parsed dictionary by the following core steps in the `reply` method. +# +# 1. Put the format instruction in prompt to guide LLM to generate the desired output +# 2. Parse the LLM response +# 3. Post-process the parsed dictionary by using the `to_memory`, `to_content`, and `to_metadata` methods +# +# .. tip:: By changing different parsers, the agent can adapt to different scenarios and generate structured output in various formats. + +from agentscope.models import DashScopeChatWrapper +from agentscope.agents import AgentBase +from agentscope.message import Msg + + +class Agent(AgentBase): + def __init__(self): + self.name = "Alice" + super().__init__(name=self.name) + + self.sys_prompt = f"You're a helpful assistant named {self.name}." + + self.model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", + ) + + self.parser = MarkdownJsonDictParser( + content_hint='{"thought": "what you thought", "speak": "what you speak", "use_ability": "whether to use the ability"}', + keys_to_memory=["thought", "speak"], + keys_to_content="speak", + keys_to_metadata="use_ability", + ) + + self.memory.add(Msg("system", self.sys_prompt, "system")) + + def reply(self, msg): + self.memory.add(msg) + + prompt = self.model.format( + self.memory.get_memory(), + # Instruct the model to respond in the required format + Msg("system", self.parser.format_instruction, "system"), + ) + + response = self.model(prompt) + + parsed_response = self.parser.parse(response) + + self.memory.add( + Msg( + name=self.name, + content=self.parser.to_memory(parsed_response.parsed), + role="assistant", + ), + ) + + return Msg( + name=self.name, + content=self.parser.to_content(parsed_response.parsed), + role="assistant", + metadata=self.parser.to_metadata(parsed_response.parsed), + ) diff --git a/docs/tutorial/en/source/tutorial/tool.py b/docs/tutorial/en/source/tutorial/tool.py new file mode 100644 index 000000000..49ca2ec69 --- /dev/null +++ b/docs/tutorial/en/source/tutorial/tool.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +""" +.. _tools: + +Tools +==================== + +In this tutorial we show how to use the built-in tools in AgentScope and +how to create custom tools. +""" +import json + +import agentscope +from agentscope.message import Msg + +# %% +# Using Built-in Tools +# -------------------------- +# AgentScope provides a `ServiceToolkit` module that supports +# +# - tool introduction generation, +# - a default call format, +# - response parsing, tools calling and agent-oriented error handling. +# +# Before using `ServiceToolkit`, we can take a look at the available tools in +# the `agentscope.service` module. + +from agentscope.service import get_help, ServiceResponse, ServiceExecStatus + +get_help() + +# %% +# All above functions are implemented as Python functions. +# They can be registered to the `ServiceToolkit` by calling the `add` method. + +from agentscope.service import ServiceToolkit +from agentscope.service import bing_search, execute_shell_command + +toolkit = ServiceToolkit() +toolkit.add(execute_shell_command) + +# Note some parameters of the tool functions (e.g. api_key) should be handled +# by developers. +# You can directly pass these parameters as keyword arguments in the add +# method as follows, the reserved parameters will be left to the agent to fill. + +toolkit.add(bing_search, api_key="xxx") + +print("The tools instruction:") +print(toolkit.tools_instruction) + +# %% +# The built-in default calling format: + +print(toolkit.tools_calling_format) + +# %% +# The JSON Schema description of the tool functions: + +print(json.dumps(toolkit.json_schemas, indent=2)) + + +# %% + +# %% +# After assembling the `ServiceToolkit`, you can integrate it into agent. +# In AgentScope, we provide a `ReActAgent` to handle the tool usage, you can +# directly pass the `ServiceToolkit` object into this agent. +# Refer to [] for implementation details of this agent. + +from agentscope.agents import ReActAgent + +agentscope.init( + model_configs={ + "config_name": "my-qwen-max", + "model_type": "dashscope_chat", + "model_name": "qwen-max", + }, +) + +agent = ReActAgent( + name="Friday", + model_config_name="my-qwen-max", + service_toolkit=toolkit, + sys_prompt="You're a helpful assistant named Friday.", +) + +msg_task = Msg("user", "Help me to calculate 1615114134*4343434343", "user") + +res = agent(msg_task) + + +# %% +# Creating Custom Tools +# -------------------------- +# A custom tool function must follow these rules: +# +# - Typing for arguments +# - Well-written docstring in Google style +# - The return of the function must be wrapped by `ServiceResponse` + + +def new_function(arg1: str, arg2: int) -> ServiceResponse: + """A brief introduction of this function in one line. + + Args: + arg1 (`str`): + Brief description of arg1 + arg2 (`int`): + Brief description of arg2 + """ + return ServiceResponse( + status=ServiceExecStatus.SUCCESS, + content="Done!", + ) diff --git a/docs/tutorial/en/source/tutorial/visual.py b/docs/tutorial/en/source/tutorial/visual.py new file mode 100644 index 000000000..122776d8d --- /dev/null +++ b/docs/tutorial/en/source/tutorial/visual.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: sphinx +# format_version: '1.1' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +""" +.. _visual-interface: + +Visual Interface +========================= + +AgentScope supports various visual interfaces for better user experience, +including Gradio and AgentScope Studio. + +Gradio +~~~~~~~~~~~~~~~~~~~~~~ + +First ensure you have installed the full version of AgentScope, which +includes the Gradio package. + +.. code-block:: bash + + # From pypi + pip install agentscope[full] + + # From source code + cd agentscope + pip install .[full] + + +After that, ensure your application is wrapped by a `main` function. + +.. code-block:: python + + from agentscope.agents import DialogAgent, UserAgent + import agentscope + + + def main(): + # Your code here + agentscope.init(model_configs={ + "config_name": "my-qwen-max", + "model_type": "dashscope_chat", + "model_name": "qwen-max" + }) + + agent = DialogAgent( + name="Alice, + model_config_name="my-qwen-max", + sys_prompt="You're a helpful assistant named Alice." + ) + user = UserAgent(agent) + + msg = None + while True: + msg = agent(msg) + msg = user(msg) + if msg.content == "exit": + break + + +Then execute the following command in the terminal to start the Gradio UI: + +.. code-block :: bash + + as_gradio {path_to_your_python_code} + +Finally, you can visit the Gradio UI as follows: + +.. image:: https://img.alicdn.com/imgextra/i1/O1CN0181KSfH1oNbfzjUAVT_!!6000000005213-0-tps-3022-1530.jpg + :align: center + :class: bordered-image + +------------------------------ + +AgentScope Studio +~~~~~~~~~~~~~~~~~~ + +AgentScope Studio is an open sourced Web UI toolkit for building and +monitoring multi-agent applications. It provides the following features: + +* **Dashboard**: A user-friendly interface, where you can monitor your running applications, and look through the running histories. + +* **Workstation**: A powerful interface to build your multi-agent applications with Dragging & Dropping. + +* **Server Manager**: An easy-to-use monitoring and management tool for managing large-scale distributed applications. + +* **Gallery**: Fruitful applications and demos in Workstation. (Coming soon!) + +For details about Workstation and Gallery, please refer to :ref:`Low-code +Developments `. +For details about Server Manager, please refer to :ref:`Distribution +`. + + +.. _studio: + +Start AgentScope Studio +---------------------------- + +To start a studio, first ensure you have installed the latest version of +AgentScope. Then, you can simply run the following Python code: + +.. code-block:: python + + import agentscope + agentscope.studio.init() + +Or you can run the following command in the terminal: + +.. code-block :: python + + as_studio + +After that, you can visit AgentScope studio at http://127.0.0.1:5000, and +the following page will be displayed: + +.. image:: https://img.alicdn.com/imgextra/i3/O1CN01Xic0GQ1ZkJ4M0iD8F_!!6000000003232-0-tps-3452-1610.jpg + :align: center + :class: bordered-image + +Of course, you can change the host and port, and link to your application +running histories by providing the following arguments: + +.. code-block:: python + + import agentscope + + agentscope.studio.init( + host="127.0.0.1", # The IP address of AgentScope studio + port=5000, # The port number of AgentScope studio + run_dirs = [ # The directories of your running histories + "xxx/xxx/runs", + "xxx/xxx/runs" + ] + ) + + +Dashboard +----------------- + +Dashboard is a web interface to monitor your running applications and look +through the running histories. + + +Note +^^^^^^^^^^^^^^^^^^^^^ + +Currently, Dashboard has the following limitations, and we are working on +improving it. Any feedback, contribution, or suggestion are welcome! + +* The running application and AgentScope Studio must be running on the same +machine for ``URL/path consistency``. If you want to visit AgentScope in the +other machine, you can try to forward the port to the remote machine by +running the following command in the remote machine: + +.. code-block :: bash + + # Supposing AgentScope is running on {as_host}:{as_port}, and the port + # of the remote machine is {remote_machine_port} + ssh -L {remote_machine_port}:{as_host}:{as_port} [{user_name}@]{as_host} + +* For distributed applications, the single-machine & multiprocess mode is +supported, but the multi-machine multiprocess mode is not supported yet. + +Register Running Application +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +After starting the AgentScope Studio, you can register your running +applications by specifying ``studio_url`` in ``agentscope.init()``: + +.. code-block:: python + + import agentscope + + agentscope.init( + # ... + project="xxx", + name="xxx", + studio_url="http://127.0.0.1:5000" # The URL of AgentScope Studio + ) + +After registering, you can view the running application in the Dashboard. To +distinguish different applications, you can specify the project and name of +the application. + +.. image:: https://img.alicdn.com/imgextra/i2/O1CN01zcUmuJ1I3OUXy1Q35_!!6000000000837-0-tps-3426-1718.jpg + :align: center + :class: bordered-image + +Click the program with status ``waiting`` to enter the execution +interface. For example, the following picture show a conversation interface. + +.. image:: https://img.alicdn.com/imgextra/i3/O1CN01sA3VUc1h7OLKVLfr3_!!6000000004230-0-tps-3448-1736.jpg + :align: center + :class: bordered-image + + +.. note:: Once you register the running application, the input operation + within the ``agentscope.agents.UserAgent`` class will be transferred to the + Dashboard in AgentScope Studio, and you can enter the input in the Dashboard. + +Import Running Histories +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In AgentScope, the running histories are saved in the ``./runs directory`` by +default. If you want to watch these running histories in the Dashboard, you +can specify the ``run_dirs`` in ``agentscope.studio.init()``: + + +.. code-block:: python + + import agentscope + + agentscope.studio.init( + run_dirs = ["xxx/runs"] + ) + +""" diff --git a/docs/tutorial/en/source/tutorial/web_browser.py b/docs/tutorial/en/source/tutorial/web_browser.py new file mode 100644 index 000000000..f5e6e374a --- /dev/null +++ b/docs/tutorial/en/source/tutorial/web_browser.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +""" +.. _web-browser-control: + +Web Browser Control +==================== + +This section is redirected to the +`conversation_with_web_browser_agent/README.md +`_. +""" From 1b0c28ae4397e812ce7da9ca984ceb5710cb2dde Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 17:37:50 +0800 Subject: [PATCH 02/23] fix error --- .github/workflows/sphinx_docs.yml | 3 +++ docs/tutorial/build.sh | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) delete mode 100755 docs/tutorial/build.sh diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 60fe14ed5..8e22c6468 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -32,6 +32,9 @@ jobs: - name: Install Dependencies run: | pip install -q -e .[full] + - name: Add execute permission to build.sh + run: | + chmod +x docs/tutorial/en/build.sh - id: build name: Build Documentation run: | diff --git a/docs/tutorial/build.sh b/docs/tutorial/build.sh deleted file mode 100755 index 486cfa419..000000000 --- a/docs/tutorial/build.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -sphinx-apidoc -f -o en/source ../../src/agentscope -t template -e -sphinx-apidoc -f -o zh_CN/source ../../src/agentscope -t template -e -make clean all From 7d01a34f2f5c41d96892e5c7e2044c2ba3f2a625 Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 17:41:29 +0800 Subject: [PATCH 03/23] Update dependencies --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 3b090c2f8..b9bee5039 100644 --- a/setup.py +++ b/setup.py @@ -75,6 +75,8 @@ "sphinx_rtd_theme", "myst-parser", "sphinxcontrib-mermaid", + "sphinx-gallery", + "sphinx-autobuild", # extra "transformers", ] From 2a69e6469decdd9c6d6a2f895355fdd76e827c64 Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 17:46:49 +0800 Subject: [PATCH 04/23] Add secret key --- .github/workflows/sphinx_docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 8e22c6468..09417f521 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -37,6 +37,8 @@ jobs: chmod +x docs/tutorial/en/build.sh - id: build name: Build Documentation + env: + SECRET_KEY: ${{ secrets.DASHSCOPE_TEST_KEY }} run: | cd docs/tutorial/en/ ./build.sh From 8d4c81708600e6b255ef2977df1b8fd8ad4f266b Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 17:51:24 +0800 Subject: [PATCH 05/23] Fix error --- .github/workflows/sphinx_docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 09417f521..b6c16cec3 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -38,7 +38,7 @@ jobs: - id: build name: Build Documentation env: - SECRET_KEY: ${{ secrets.DASHSCOPE_TEST_KEY }} + SECRET_KEY: ${{ secrets.DASHSCOPE_API_KEY }} run: | cd docs/tutorial/en/ ./build.sh From bd08fa4058ac2aac82227ddf198dc1daec2588ff Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 17:54:27 +0800 Subject: [PATCH 06/23] debug --- .github/workflows/sphinx_docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index b6c16cec3..f702788a4 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -40,6 +40,8 @@ jobs: env: SECRET_KEY: ${{ secrets.DASHSCOPE_API_KEY }} run: | + echo $DASHSCOPE_API_KEY + echo "Here!" cd docs/tutorial/en/ ./build.sh - name: Upload Documentation From 7798b5933c91e57f1ef5e9148ffb2ff650107f7a Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 17:57:04 +0800 Subject: [PATCH 07/23] debug --- .github/workflows/sphinx_docs.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index f702788a4..cd6ab029e 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -38,10 +38,8 @@ jobs: - id: build name: Build Documentation env: - SECRET_KEY: ${{ secrets.DASHSCOPE_API_KEY }} + DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} run: | - echo $DASHSCOPE_API_KEY - echo "Here!" cd docs/tutorial/en/ ./build.sh - name: Upload Documentation From 4f1f27d231d4ab6f141262f2c2b60f1ac764bfdf Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 18:00:29 +0800 Subject: [PATCH 08/23] debug --- .github/workflows/sphinx_docs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index cd6ab029e..1e255406d 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -40,6 +40,7 @@ jobs: env: DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} run: | + echo $DASHSCOPE_API_KEY cd docs/tutorial/en/ ./build.sh - name: Upload Documentation From d19e94e9ad78433f91ca8207935b026f2fd23ebe Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 18:05:37 +0800 Subject: [PATCH 09/23] debug --- docs/tutorial/en/build.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/tutorial/en/build.sh b/docs/tutorial/en/build.sh index e8d1bcdb2..27cdd6377 100644 --- a/docs/tutorial/en/build.sh +++ b/docs/tutorial/en/build.sh @@ -1,3 +1,7 @@ #!/bin/bash +echo "Start ..." +echo $DASHSCOPE_API_KEY +echo "Done" + sphinx-build -M html source build \ No newline at end of file From 449bedc16d311f9ab685ae19c5b591e11e7ac072 Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 18:08:43 +0800 Subject: [PATCH 10/23] debug --- docs/tutorial/en/build.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/tutorial/en/build.sh b/docs/tutorial/en/build.sh index 27cdd6377..5ceb53ae8 100644 --- a/docs/tutorial/en/build.sh +++ b/docs/tutorial/en/build.sh @@ -1,7 +1,12 @@ #!/bin/bash echo "Start ..." -echo $DASHSCOPE_API_KEY +if [ -n "$DASHSCOPE_API_KEY" ]; then + echo "DASHSCOPE_API_KEY is set and not empty" + echo "DASHSCOPE_API_KEY=$DASHSCOPE_API_KEY" +else + echo "DASHSCOPE_API_KEY is not set or is empty" +fi echo "Done" sphinx-build -M html source build \ No newline at end of file From 61305a6b1c4b8b3b5f86488c83b79188e5addf0e Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 18:14:08 +0800 Subject: [PATCH 11/23] debug --- .github/workflows/sphinx_docs.yml | 12 ++++++------ docs/tutorial/en/build.sh | 7 +------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 1e255406d..3de29911a 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -26,12 +26,12 @@ jobs: uses: actions/setup-python@master with: python-version: ${{ matrix.python-version }} - - name: Update setuptools - run: | - pip install setuptools==68.2.2 wheel==0.41.2 - - name: Install Dependencies - run: | - pip install -q -e .[full] +# - name: Update setuptools +# run: | +# pip install setuptools==68.2.2 wheel==0.41.2 +# - name: Install Dependencies +# run: | +# pip install -q -e .[full] - name: Add execute permission to build.sh run: | chmod +x docs/tutorial/en/build.sh diff --git a/docs/tutorial/en/build.sh b/docs/tutorial/en/build.sh index 5ceb53ae8..c144dd541 100644 --- a/docs/tutorial/en/build.sh +++ b/docs/tutorial/en/build.sh @@ -1,12 +1,7 @@ #!/bin/bash echo "Start ..." -if [ -n "$DASHSCOPE_API_KEY" ]; then - echo "DASHSCOPE_API_KEY is set and not empty" - echo "DASHSCOPE_API_KEY=$DASHSCOPE_API_KEY" -else - echo "DASHSCOPE_API_KEY is not set or is empty" -fi +python -c "import os; print('!!!' + os.environ['DASHSCOPE_API_KEY'] + '+++')" echo "Done" sphinx-build -M html source build \ No newline at end of file From 3c450b52cddd0cc45d2891fe10163f2c995bb61b Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 18:18:28 +0800 Subject: [PATCH 12/23] debug --- .github/workflows/sphinx_docs.yml | 7 +------ docs/tutorial/en/build.sh | 6 ++++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 3de29911a..896274d99 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -26,12 +26,7 @@ jobs: uses: actions/setup-python@master with: python-version: ${{ matrix.python-version }} -# - name: Update setuptools -# run: | -# pip install setuptools==68.2.2 wheel==0.41.2 -# - name: Install Dependencies -# run: | -# pip install -q -e .[full] + - name: Add execute permission to build.sh run: | chmod +x docs/tutorial/en/build.sh diff --git a/docs/tutorial/en/build.sh b/docs/tutorial/en/build.sh index c144dd541..eee541f64 100644 --- a/docs/tutorial/en/build.sh +++ b/docs/tutorial/en/build.sh @@ -4,4 +4,10 @@ echo "Start ..." python -c "import os; print('!!!' + os.environ['DASHSCOPE_API_KEY'] + '+++')" echo "Done" +if [ -z "$DASHSCOPE_API_KEY" ]; then + echo "DASHSCOPE_API_KEY is not set" > debug.txt +else + echo "DASHSCOPE_API_KEY is set" > debug.txt +fi + sphinx-build -M html source build \ No newline at end of file From cdcfe426654441778784187d8c967de6c06a95cf Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 18:32:40 +0800 Subject: [PATCH 13/23] debug --- .github/workflows/sphinx_docs.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 896274d99..cdaabe8a2 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -1,10 +1,6 @@ name: Deploy Sphinx documentation to Pages on: - pull_request: - types: [opened, synchronize] - paths: - - 'docs/**/*' push: branches: - main From 1071e51d3b5148fcdc569cce6a64e8417a85db19 Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 18:33:39 +0800 Subject: [PATCH 14/23] debug --- .github/workflows/sphinx_docs.yml | 1 - docs/tutorial/en/build.sh | 4 ---- 2 files changed, 5 deletions(-) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index cdaabe8a2..6dfed4fc7 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -31,7 +31,6 @@ jobs: env: DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} run: | - echo $DASHSCOPE_API_KEY cd docs/tutorial/en/ ./build.sh - name: Upload Documentation diff --git a/docs/tutorial/en/build.sh b/docs/tutorial/en/build.sh index eee541f64..e817fa732 100644 --- a/docs/tutorial/en/build.sh +++ b/docs/tutorial/en/build.sh @@ -1,9 +1,5 @@ #!/bin/bash -echo "Start ..." -python -c "import os; print('!!!' + os.environ['DASHSCOPE_API_KEY'] + '+++')" -echo "Done" - if [ -z "$DASHSCOPE_API_KEY" ]; then echo "DASHSCOPE_API_KEY is not set" > debug.txt else From 5f5701a60086df62bcd42b5774a927339544559c Mon Sep 17 00:00:00 2001 From: DavdGao Date: Mon, 6 Jan 2025 18:40:39 +0800 Subject: [PATCH 15/23] debug --- .github/workflows/sphinx_docs.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 6dfed4fc7..e7036e08e 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -22,7 +22,12 @@ jobs: uses: actions/setup-python@master with: python-version: ${{ matrix.python-version }} - + - name: Update setuptools + run: | + pip install setuptools==68.2.2 wheel==0.41.2 + - name: Install Dependencies + run: | + pip install -q -e .[full] - name: Add execute permission to build.sh run: | chmod +x docs/tutorial/en/build.sh From f48f7d5f2acbe08d1da97150dea73532bdada5ce Mon Sep 17 00:00:00 2001 From: DavdGao Date: Tue, 7 Jan 2025 11:19:23 +0800 Subject: [PATCH 16/23] for test --- .github/workflows/sphinx_docs.yml | 27 ++++++++++++++++++++- .github/workflows/unittest.yml | 39 ------------------------------- 2 files changed, 26 insertions(+), 40 deletions(-) delete mode 100644 .github/workflows/unittest.yml diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index e7036e08e..ef74abdef 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -6,7 +6,32 @@ on: - main jobs: - pages: + check-approval-and-run-script: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Check for approval + id: check_approval + run: | + # Fetch reviews for the pull request + response=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/${GITHUB_REPOSITORY}/pulls/${{ github.event.pull_request.number }}/reviews) + + # Count the number of APPROVED reviews + approvals=$(echo "$response" | grep '"state": "APPROVED"' | wc -l) + + echo "Approvals: $approvals" + + if [ "$approvals" -lt 1 ]; then + echo "No approvals yet." + exit 1 + fi + + build_tutorial: + needs: check-approval-and-run-script timeout-minutes: 20 runs-on: ${{ matrix.os }} strategy: diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml deleted file mode 100644 index 89f6285d1..000000000 --- a/.github/workflows/unittest.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: Python Unittest Coverage - -on: [push, pull_request] - -jobs: - test: - if: false == contains(github.event.pull_request.title, 'WIP') - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, windows-latest, macos-13] - python-version: ['3.9', '3.10', '3.11', '3.12'] - env: - OS: ${{ matrix.os }} - steps: - - uses: actions/checkout@master - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@master - with: - python-version: ${{ matrix.python-version }} - - name: Update setuptools - run: | - pip install setuptools==68.2.2 wheel==0.41.2 - - name: Install Minimal Dependencies - run: | - pip install -q -e . - - name: Run minimal import tests - run: | - python tests/minimal.py - - name: Install Full Dependencies - run: | - pip install -q -e .[full] - pip install coverage - - name: Run tests with coverage - run: | - coverage run tests/run.py - - name: Generate coverage report - run: | - coverage report -m From d8d5d2990a530d8073c353f4a20ee8b0ee428610 Mon Sep 17 00:00:00 2001 From: DavdGao Date: Tue, 7 Jan 2025 12:33:43 +0800 Subject: [PATCH 17/23] Update the tested version --- .github/workflows/sphinx_docs.yml | 27 +-------------------------- docs/tutorial/en/build.sh | 6 ------ 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index ef74abdef..e95d8403c 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -6,32 +6,7 @@ on: - main jobs: - check-approval-and-run-script: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Check for approval - id: check_approval - run: | - # Fetch reviews for the pull request - response=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ - -H "Accept: application/vnd.github.v3+json" \ - https://api.github.com/repos/${GITHUB_REPOSITORY}/pulls/${{ github.event.pull_request.number }}/reviews) - - # Count the number of APPROVED reviews - approvals=$(echo "$response" | grep '"state": "APPROVED"' | wc -l) - - echo "Approvals: $approvals" - - if [ "$approvals" -lt 1 ]; then - echo "No approvals yet." - exit 1 - fi - build_tutorial: - needs: check-approval-and-run-script timeout-minutes: 20 runs-on: ${{ matrix.os }} strategy: @@ -73,4 +48,4 @@ jobs: with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: 'docs/tutorial/build/html' - cname: doc.agentscope.io \ No newline at end of file + cname: doc.agentscope.io diff --git a/docs/tutorial/en/build.sh b/docs/tutorial/en/build.sh index e817fa732..e8d1bcdb2 100644 --- a/docs/tutorial/en/build.sh +++ b/docs/tutorial/en/build.sh @@ -1,9 +1,3 @@ #!/bin/bash -if [ -z "$DASHSCOPE_API_KEY" ]; then - echo "DASHSCOPE_API_KEY is not set" > debug.txt -else - echo "DASHSCOPE_API_KEY is set" > debug.txt -fi - sphinx-build -M html source build \ No newline at end of file From b03126cb7c4d396f05443cac6a6415cb743b2176 Mon Sep 17 00:00:00 2001 From: DavdGao Date: Tue, 7 Jan 2025 18:02:53 +0800 Subject: [PATCH 18/23] Finish debugging --- docs/tutorial/en/build.sh | 6 +++++ .../en/source/_templates/module.rst_t | 5 ++++ .../en/source/_templates/package.rst_t | 10 ++++++++ docs/tutorial/en/source/conf.py | 25 ++++++++++++++++++- docs/tutorial/en/source/index.rst | 16 ++++++++++++ 5 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 docs/tutorial/en/source/_templates/module.rst_t create mode 100644 docs/tutorial/en/source/_templates/package.rst_t diff --git a/docs/tutorial/en/build.sh b/docs/tutorial/en/build.sh index e8d1bcdb2..358d1bfa6 100644 --- a/docs/tutorial/en/build.sh +++ b/docs/tutorial/en/build.sh @@ -1,3 +1,9 @@ #!/bin/bash +set -e + +# Generate the API rst files +sphinx-apidoc -o source/build_api ../../../src/agentscope -t source/_templates -e + +# Build the html sphinx-build -M html source build \ No newline at end of file diff --git a/docs/tutorial/en/source/_templates/module.rst_t b/docs/tutorial/en/source/_templates/module.rst_t new file mode 100644 index 000000000..74d73a4c5 --- /dev/null +++ b/docs/tutorial/en/source/_templates/module.rst_t @@ -0,0 +1,5 @@ +{{ basename | heading }} +.. automodule:: {{ qualname }} +{%- for option in automodule_options %} + :{{ option }}: +{%- endfor %} \ No newline at end of file diff --git a/docs/tutorial/en/source/_templates/package.rst_t b/docs/tutorial/en/source/_templates/package.rst_t new file mode 100644 index 000000000..0c0f707da --- /dev/null +++ b/docs/tutorial/en/source/_templates/package.rst_t @@ -0,0 +1,10 @@ +{%- macro automodule(modname, options) -%} +.. automodule:: {{ modname }} +{%- for option in options %} + :{{ option }}: +{%- endfor %} +{%- endmacro %} + +{{- pkgname | heading }} + +{{ automodule(pkgname, automodule_options) }} \ No newline at end of file diff --git a/docs/tutorial/en/source/conf.py b/docs/tutorial/en/source/conf.py index face8e930..9345569c0 100644 --- a/docs/tutorial/en/source/conf.py +++ b/docs/tutorial/en/source/conf.py @@ -8,7 +8,7 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "AgentScope Doc" -copyright = "2024, Alibaba" +copyright = "2025, Alibaba" author = "Alibaba Tongyi Lab" # -- General configuration --------------------------------------------------- @@ -17,6 +17,9 @@ extensions = [ "myst_parser", "sphinx_gallery.gen_gallery", + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", ] myst_enable_extensions = [ @@ -52,3 +55,23 @@ ] source_suffix = [".md", ".rst"] + + +# -- Options for API documentation ------------------------------------------- + +autodoc_member_order = "groupwise" + + +def skip_member(app, what, name, obj, skip, options): + if name in [ + "Operator", + "ServiceFactory", + "", + ]: + return True + + return skip + + +def setup(app): + app.connect("autodoc-skip-member", skip_member) diff --git a/docs/tutorial/en/source/index.rst b/docs/tutorial/en/source/index.rst index 81c1ad696..69a63ce36 100644 --- a/docs/tutorial/en/source/index.rst +++ b/docs/tutorial/en/source/index.rst @@ -46,3 +46,19 @@ Welcome to AgentScope's documentation! :caption: Examples build_tutorial/examples + +.. toctree:: + :maxdepth: 1 + :caption: API Docs + + build_api/agentscope + build_api/agentscope.message + build_api/agentscope.models + build_api/agentscope.agents + build_api/agentscope.memory + build_api/agentscope.parsers + build_api/agentscope.rag + build_api/agentscope.service + build_api/agentscope.prompt + build_api/agentscope.tokens + build_api/agentscope.exception From 1378d42b59ce475434692728e1422b6c27ef8f00 Mon Sep 17 00:00:00 2001 From: DavdGao Date: Tue, 7 Jan 2025 18:05:35 +0800 Subject: [PATCH 19/23] To activate unittests --- src/agentscope/_init.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agentscope/_init.py b/src/agentscope/_init.py index 42631b227..5aabe2d86 100644 --- a/src/agentscope/_init.py +++ b/src/agentscope/_init.py @@ -66,8 +66,8 @@ def init( search invocation. cache_dir (`str`): The directory to cache files. In Linux/Mac, the dir defaults to - `~/.cache/agentscope`. In Windows, the dir defaults to - `C:\\users\\\\.cache\\agentscope`. + `~/.cache/agentscope`. In Windows, the dir defaults to + `C:\\users\\\\.cache\\agentscope`. use_monitor (`bool`, defaults to `True`): Whether to activate the monitor. logger_level (`LOG_LEVEL`, defaults to `"INFO"`): From ca6c9712f73f1edde8153acebe5c01c07f9da0df Mon Sep 17 00:00:00 2001 From: DavdGao Date: Tue, 7 Jan 2025 18:08:17 +0800 Subject: [PATCH 20/23] Recover unittests --- .github/workflows/unittest.yml | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/unittest.yml diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml new file mode 100644 index 000000000..72d95ae26 --- /dev/null +++ b/.github/workflows/unittest.yml @@ -0,0 +1,39 @@ +name: Python Unittest Coverage + +on: [push, pull_request] + +jobs: + test: + if: false == contains(github.event.pull_request.title, 'WIP') + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macos-13] + python-version: ['3.9', '3.10', '3.11', '3.12'] + env: + OS: ${{ matrix.os }} + steps: + - uses: actions/checkout@master + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@master + with: + python-version: ${{ matrix.python-version }} + - name: Update setuptools + run: | + pip install setuptools==68.2.2 wheel==0.41.2 + - name: Install Minimal Dependencies + run: | + pip install -q -e . + - name: Run minimal import tests + run: | + python tests/minimal.py + - name: Install Full Dependencies + run: | + pip install -q -e .[full] + pip install coverage + - name: Run tests with coverage + run: | + coverage run tests/run.py + - name: Generate coverage report + run: | + coverage report -m \ No newline at end of file From 1852c966541c20a35a8daa6fdc23f33efa6af6ee Mon Sep 17 00:00:00 2001 From: DavdGao Date: Tue, 7 Jan 2025 18:49:15 +0800 Subject: [PATCH 21/23] update --- docs/tutorial/en/source/conf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/tutorial/en/source/conf.py b/docs/tutorial/en/source/conf.py index 9345569c0..eaf3d09bf 100644 --- a/docs/tutorial/en/source/conf.py +++ b/docs/tutorial/en/source/conf.py @@ -60,6 +60,8 @@ # -- Options for API documentation ------------------------------------------- autodoc_member_order = "groupwise" +add_module_names = False +python_display_short_literal_types = True def skip_member(app, what, name, obj, skip, options): From 1dbdf10cf4bca9af5b01a7948a059b5d206b0459 Mon Sep 17 00:00:00 2001 From: DavdGao Date: Wed, 8 Jan 2025 20:46:00 +0800 Subject: [PATCH 22/23] Finish translating into Chinese --- .github/workflows/sphinx_docs.yml | 6 +- docs/tutorial/en/source/tutorial/agent.py | 2 +- docs/tutorial/en/source/tutorial/monitor.py | 4 +- docs/tutorial/zh/Makefile | 20 + docs/tutorial/zh/build.sh | 9 + docs/tutorial/zh/make.bat | 35 ++ .../zh/source/_static/css/gallery.css | 83 +++++ .../zh/source/_templates/module.rst_t | 5 + .../zh/source/_templates/package.rst_t | 10 + docs/tutorial/zh/source/conf.py | 79 ++++ docs/tutorial/zh/source/index.rst | 64 ++++ docs/tutorial/zh/source/tutorial/README.md | 0 docs/tutorial/zh/source/tutorial/agent.py | 177 +++++++++ .../zh/source/tutorial/builtin_agent.py | 222 +++++++++++ .../zh/source/tutorial/conversation.py | 142 +++++++ .../zh/source/tutorial/distribution.py | 222 +++++++++++ docs/tutorial/zh/source/tutorial/examples.py | 178 +++++++++ docs/tutorial/zh/source/tutorial/faq.md | 48 +++ docs/tutorial/zh/source/tutorial/low_code.py | 100 +++++ docs/tutorial/zh/source/tutorial/message.py | 58 +++ docs/tutorial/zh/source/tutorial/model.py | 347 ++++++++++++++++++ docs/tutorial/zh/source/tutorial/monitor.py | 76 ++++ .../zh/source/tutorial/multimodality.py | 78 ++++ docs/tutorial/zh/source/tutorial/prompt.py | 133 +++++++ .../zh/source/tutorial/prompt_optimization.py | 102 +++++ .../tutorial/zh/source/tutorial/quickstart.py | 65 ++++ docs/tutorial/zh/source/tutorial/rag.md | 280 ++++++++++++++ docs/tutorial/zh/source/tutorial/streaming.py | 127 +++++++ .../zh/source/tutorial/structured_output.py | 235 ++++++++++++ docs/tutorial/zh/source/tutorial/tool.py | 112 ++++++ docs/tutorial/zh/source/tutorial/visual.py | 202 ++++++++++ .../zh/source/tutorial/web_browser.py | 11 + 32 files changed, 3226 insertions(+), 6 deletions(-) create mode 100644 docs/tutorial/zh/Makefile create mode 100644 docs/tutorial/zh/build.sh create mode 100644 docs/tutorial/zh/make.bat create mode 100644 docs/tutorial/zh/source/_static/css/gallery.css create mode 100644 docs/tutorial/zh/source/_templates/module.rst_t create mode 100644 docs/tutorial/zh/source/_templates/package.rst_t create mode 100644 docs/tutorial/zh/source/conf.py create mode 100644 docs/tutorial/zh/source/index.rst create mode 100644 docs/tutorial/zh/source/tutorial/README.md create mode 100644 docs/tutorial/zh/source/tutorial/agent.py create mode 100644 docs/tutorial/zh/source/tutorial/builtin_agent.py create mode 100644 docs/tutorial/zh/source/tutorial/conversation.py create mode 100644 docs/tutorial/zh/source/tutorial/distribution.py create mode 100644 docs/tutorial/zh/source/tutorial/examples.py create mode 100644 docs/tutorial/zh/source/tutorial/faq.md create mode 100644 docs/tutorial/zh/source/tutorial/low_code.py create mode 100644 docs/tutorial/zh/source/tutorial/message.py create mode 100644 docs/tutorial/zh/source/tutorial/model.py create mode 100644 docs/tutorial/zh/source/tutorial/monitor.py create mode 100644 docs/tutorial/zh/source/tutorial/multimodality.py create mode 100644 docs/tutorial/zh/source/tutorial/prompt.py create mode 100644 docs/tutorial/zh/source/tutorial/prompt_optimization.py create mode 100644 docs/tutorial/zh/source/tutorial/quickstart.py create mode 100644 docs/tutorial/zh/source/tutorial/rag.md create mode 100644 docs/tutorial/zh/source/tutorial/streaming.py create mode 100644 docs/tutorial/zh/source/tutorial/structured_output.py create mode 100644 docs/tutorial/zh/source/tutorial/tool.py create mode 100644 docs/tutorial/zh/source/tutorial/visual.py create mode 100644 docs/tutorial/zh/source/tutorial/web_browser.py diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index e95d8403c..97f99ced4 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -31,8 +31,8 @@ jobs: - name: Add execute permission to build.sh run: | chmod +x docs/tutorial/en/build.sh - - id: build - name: Build Documentation + - id: build_en + name: Build English Documentation env: DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} run: | @@ -47,5 +47,5 @@ jobs: if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} with: github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: 'docs/tutorial/build/html' + publish_dir: 'docs/tutorial/en/build/html' cname: doc.agentscope.io diff --git a/docs/tutorial/en/source/tutorial/agent.py b/docs/tutorial/en/source/tutorial/agent.py index 9be78d529..db0198c4f 100644 --- a/docs/tutorial/en/source/tutorial/agent.py +++ b/docs/tutorial/en/source/tutorial/agent.py @@ -175,5 +175,5 @@ def reply(self, msg): # # Further Reading # --------------------- -# - :ref:`builtin-agent` +# - :ref:`builtin_agent` # - :ref:`model_api` diff --git a/docs/tutorial/en/source/tutorial/monitor.py b/docs/tutorial/en/source/tutorial/monitor.py index 54a1cd9d8..b9a9d6d4c 100644 --- a/docs/tutorial/en/source/tutorial/monitor.py +++ b/docs/tutorial/en/source/tutorial/monitor.py @@ -38,7 +38,7 @@ import json -print(json.dumps(agentscope.state_dict(), indent=2)) +print(json.dumps(agentscope.state_dict(), indent=2, ensure_ascii=False)) # %% # Monitoring the Runtime @@ -70,4 +70,4 @@ usage = agentscope.print_llm_usage() -print(json.dumps(usage, indent=2)) +print(json.dumps(usage, indent=2, ensure_ascii=False)) diff --git a/docs/tutorial/zh/Makefile b/docs/tutorial/zh/Makefile new file mode 100644 index 000000000..92dd33a1a --- /dev/null +++ b/docs/tutorial/zh/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/tutorial/zh/build.sh b/docs/tutorial/zh/build.sh new file mode 100644 index 000000000..358d1bfa6 --- /dev/null +++ b/docs/tutorial/zh/build.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e + +# Generate the API rst files +sphinx-apidoc -o source/build_api ../../../src/agentscope -t source/_templates -e + +# Build the html +sphinx-build -M html source build \ No newline at end of file diff --git a/docs/tutorial/zh/make.bat b/docs/tutorial/zh/make.bat new file mode 100644 index 000000000..dc1312ab0 --- /dev/null +++ b/docs/tutorial/zh/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/tutorial/zh/source/_static/css/gallery.css b/docs/tutorial/zh/source/_static/css/gallery.css new file mode 100644 index 000000000..78892dbff --- /dev/null +++ b/docs/tutorial/zh/source/_static/css/gallery.css @@ -0,0 +1,83 @@ +.sphx-glr-download-link-note.admonition.note { + display: none; +} + +.sphx-glr-download { + display: none; +} + +.bordered-image { + border: 1px solid gray; +} + +:root { + --item-card-width: 200px; + --item-card-margin: 10px; + --item-card-title-height: 50px; + + --item-card-img-length: calc(var(--item-card-width) - 2*var(--item-card-margin)); + --item-card-title-width: calc(var(--item-card-width) - 2*var(--item-card-margin)); + --item-card-title-margin-top: var(--item-card-margin); + + --item-card-height: calc(var(--item-card-margin) * 3 + var(--item-card-img-length) + var(--item-card-title-height)); +} + +.gallery-item { + position: relative; + display: inline-block; + width: var(--item-card-width); + height: var(--item-card-height); + box-shadow: 0 0 8px rgba(0, 0, 0, 0.2); + margin: 7px; +} + +.gallery-item-card { + position: absolute; + top: 0; + left: 0; + width: var(--item-card-width); + height: var(--item-card-height); + display: flex; + flex-direction: column; + margin: var(--item-card-margin); +} + +.gallery-item-card-img { + height: var(--item-card-img-length); + width: var(--item-card-img-length); + min-width: var(--item-card-img-length); + min-height: var(--item-card-img-length); + display: block; +} + +.gallery-item-card-title { + text-align: center; + margin-top: var(--item-card-margin); + font-weight: bold; + min-height: var(--item-card-title-height); + height: var(--item-card-title-height); + width: var(--item-card-title-width); + display: flex; + align-items: center; + justify-content: center; +} + +.gallery-item-description { + position: absolute; + top: 0; + left: 0; + width: 100%; + height: 100%; + background-color: rgba(255, 255, 255, 0.9); + /*background-color: #1e8449;*/ + color: black; + display: none; + justify-content: center; + align-items: flex-start; +} + +.gallery-item:hover .gallery-item-description { + display: flex; + padding: 10px; + border: 1px solid rgba(0, 0, 0, 0.22); +} \ No newline at end of file diff --git a/docs/tutorial/zh/source/_templates/module.rst_t b/docs/tutorial/zh/source/_templates/module.rst_t new file mode 100644 index 000000000..74d73a4c5 --- /dev/null +++ b/docs/tutorial/zh/source/_templates/module.rst_t @@ -0,0 +1,5 @@ +{{ basename | heading }} +.. automodule:: {{ qualname }} +{%- for option in automodule_options %} + :{{ option }}: +{%- endfor %} \ No newline at end of file diff --git a/docs/tutorial/zh/source/_templates/package.rst_t b/docs/tutorial/zh/source/_templates/package.rst_t new file mode 100644 index 000000000..0c0f707da --- /dev/null +++ b/docs/tutorial/zh/source/_templates/package.rst_t @@ -0,0 +1,10 @@ +{%- macro automodule(modname, options) -%} +.. automodule:: {{ modname }} +{%- for option in options %} + :{{ option }}: +{%- endfor %} +{%- endmacro %} + +{{- pkgname | heading }} + +{{ automodule(pkgname, automodule_options) }} \ No newline at end of file diff --git a/docs/tutorial/zh/source/conf.py b/docs/tutorial/zh/source/conf.py new file mode 100644 index 000000000..eaf3d09bf --- /dev/null +++ b/docs/tutorial/zh/source/conf.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "AgentScope Doc" +copyright = "2025, Alibaba" +author = "Alibaba Tongyi Lab" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "myst_parser", + "sphinx_gallery.gen_gallery", + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", +] + +myst_enable_extensions = [ + "colon_fence", +] + +sphinx_gallery_conf = { + "download_all_examples": False, + "examples_dirs": [ + "tutorial", + ], + "gallery_dirs": [ + "build_tutorial", + ], + "filename_pattern": "tutorial/.*\.py", + "example_extensions": [".py"], +} + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +languages = ["en", "zh_CN"] +language = "en" + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] + +html_css_files = [ + "css/gallery.css", +] + +source_suffix = [".md", ".rst"] + + +# -- Options for API documentation ------------------------------------------- + +autodoc_member_order = "groupwise" +add_module_names = False +python_display_short_literal_types = True + + +def skip_member(app, what, name, obj, skip, options): + if name in [ + "Operator", + "ServiceFactory", + "", + ]: + return True + + return skip + + +def setup(app): + app.connect("autodoc-skip-member", skip_member) diff --git a/docs/tutorial/zh/source/index.rst b/docs/tutorial/zh/source/index.rst new file mode 100644 index 000000000..69a63ce36 --- /dev/null +++ b/docs/tutorial/zh/source/index.rst @@ -0,0 +1,64 @@ +.. AgentScope Doc documentation master file, created by + sphinx-quickstart on Thu Aug 8 15:07:21 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to AgentScope's documentation! +========================================== + +.. toctree:: + :maxdepth: 1 + :caption: Tutorial + + build_tutorial/quickstart + build_tutorial/message + build_tutorial/agent + build_tutorial/conversation + +.. toctree:: + :maxdepth: 1 + :caption: FQA + + tutorial/faq + +.. toctree:: + :maxdepth: 1 + :caption: Task Guides + + build_tutorial/model + build_tutorial/prompt + build_tutorial/structured_output + build_tutorial/streaming + build_tutorial/builtin_agent + build_tutorial/multimodality + build_tutorial/visual + build_tutorial/monitor + build_tutorial/tool + tutorial/rag + build_tutorial/distribution + build_tutorial/prompt_optimization + build_tutorial/web_browser + build_tutorial/low_code + + +.. toctree:: + :maxdepth: 1 + :caption: Examples + + build_tutorial/examples + +.. toctree:: + :maxdepth: 1 + :caption: API Docs + + build_api/agentscope + build_api/agentscope.message + build_api/agentscope.models + build_api/agentscope.agents + build_api/agentscope.memory + build_api/agentscope.parsers + build_api/agentscope.rag + build_api/agentscope.service + build_api/agentscope.prompt + build_api/agentscope.tokens + build_api/agentscope.exception diff --git a/docs/tutorial/zh/source/tutorial/README.md b/docs/tutorial/zh/source/tutorial/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/docs/tutorial/zh/source/tutorial/agent.py b/docs/tutorial/zh/source/tutorial/agent.py new file mode 100644 index 000000000..611e88fd9 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/agent.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- +""" +.. _build-agent: + +构建智能体 +==================== + +AgentScope 中,可以通过继承基类`agentscope.agents.AgentBase`来构建智能体 + +在下面,我们将构建一个简单的,可以和其他人互动的智能体。 + +""" + +from agentscope.agents import AgentBase +from agentscope.memory import TemporaryMemory +from agentscope.message import Msg +from agentscope.models import DashScopeChatWrapper +import json + + +# %% +# 定义智能体 +# -------------------------------- +# 继承 `agentscope.agents.AgentBase` 类并实现其构造函数和 `reply` 方法。 +# +# 在构造函数中,我们初始化智能体的名字、系统提示、记忆模块和模型。 +# 在本例中,我们采用 DashScope Chat API 中的 `qwen-max` 作为模型服务。 +# 当然,你可以将其替换为 `agentscope.models` 下的其它模型。 +# +# `reply`方法是智能体的核心,它接受消息作为输入并返回回复消息。 +# 在该方法中,我们实现了智能体的基本逻辑: +# +# - 在记忆中记录输入消息, +# - 使用系统提示和记忆构建提示词, +# - 调用模型获取返回值, +# - 在记忆中记录返回值并返回一个消息。 +# + + +class JarvisAgent(AgentBase): + def __init__(self): + super().__init__("Jarvis") + + self.name = "Jarvis" + self.sys_prompt = "你是一个名为Jarvis的助手。" + self.memory = TemporaryMemory() + self.model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", + ) + + def reply(self, msg): + # 在记忆中记录消息 + self.memory.add(msg) + + # 使用系统提示和记忆构建上下文 + prompt = self.model.format( + Msg( + name="system", + content=self.sys_prompt, + role="system", + ), + self.memory.get_memory(), + ) + + # 调用模型获取响应 + response = self.model(prompt) + + # 在记忆中记录响应消息并返回 + msg = Msg( + name=self.name, + content=response.text, + role="assistant", + ) + self.memory.add(msg) + + self.speak(msg) + return msg + + +# %% +# 创建智能体类后,我们实例化它,并通过发送消息与之交互。 +# + +jarvis = JarvisAgent() + +msg = Msg( + name="user", + content="嗨!Jarvis。", + role="user", +) + +msg_reply = jarvis(msg) + +print(f"消息的发送者: {msg_reply.name}") +print(f"发送者的角色: {msg_reply.role}") +print(f"消息的内容: {msg_reply.content}") + + +# %% +# ====================== +# +# 组件 +# ---------- +# 现在我们简要介绍上述智能体中使用到的基本组件,包括 +# +# * 记忆 +# * 模型 +# +# 记忆 +# ^^^^^^^ +# 记忆模块提供了记忆管理的基本操作。 +# + +memory = TemporaryMemory() + +# 添加一条消息 +memory.add(Msg("system", "你是一个名为Jarvis的助手。", "system")) + +# 一次添加多条消息 +memory.add( + [ + Msg("Stank", "嗨!", "user"), + Msg("Jarvis", "我能为您做些什么吗?", "assistant"), + ], +) + +print(f"当前记忆: {memory.get_memory()}") +print(f"当前大小: {memory.size()}") + +# %% +# 使用参数 `recent_n` 获取最后两条消息。 +# + +recent_two_msgs = memory.get_memory(recent_n=2) + +for i, msg in enumerate(recent_two_msgs): + print( + f"MSG{i}: 发送者: {msg.name}, 角色: {msg.role}, 内容: {msg.content}", + ) + +# %% +# 删除记忆中的第一条消息。 +# + +memory.delete(0) + +print(f"删除后的记忆: {memory.get_memory()}") +print(f"删除后的大小: {memory.size()}") + +# %% +# 模型 +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# `agentscope.models` 封装了不同的模型 API,并在其 `format` 函数中为不同的 API 提供了基本的提示词构建策略。 +# +# 以 DashScope Chat API 为例: +# + +messages = [ + Msg("system", "你是一个名为Jarvis的助手。", "system"), + Msg("Stank", "嗨!", "user"), + Msg("Jarvis", "我能为您做些什么吗?", "assistant"), +] + +model = DashScopeChatWrapper( + config_name="api", + model_name="qwen-max", +) +prompt = model.format(messages) +print(json.dumps(prompt, indent=4, ensure_ascii=False)) + +# %% +# +# 进一步阅读 +# --------------------- +# - :ref:`builtin-agent` +# - :ref:`model_api` diff --git a/docs/tutorial/zh/source/tutorial/builtin_agent.py b/docs/tutorial/zh/source/tutorial/builtin_agent.py new file mode 100644 index 000000000..2f3ba443a --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/builtin_agent.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +""" +.. _builtin_agent: + +内置智能体 +============================= + +AgentScope 内置若干智能体类,以支持不同使用场景,同时展示如何使用 AgentScope 构建智能体。 + +.. list-table:: + :header-rows: 1 + + * - 类 + - 描述 + * - `UserAgent` + - 允许用户参与对话的智能体。 + * - `DialogAgent` + - 使用自然语言交谈的智能体。 + * - `DictDialogAgent` + - 支持结构化输出的智能体。 + * - `ReActAgent` + - 以 reasoning-acting 循环的方式使用工具的智能体。 + * - `LlamaIndexAgent` + - 检索增强型生成 (RAG) 智能体。 + +""" + +import agentscope + +for module in agentscope.agents.__all__: + if module.endswith("Agent"): + print(module) + +# %% +# .. note:: 为了使同一个智能体类能够支持不同的大语言模型 API,所有内置智能体类都通过模型配置名 `model_config_name` 来进行初始化。如果你构建的智能体不打算多个不同的模型,推荐可以显式地进行模型初始化,而不是使用模型配置名。 +# + +import agentscope + +agentscope.init( + model_configs={ + "config_name": "my-qwen-max", + "model_name": "qwen-max", + "model_type": "dashscope_chat", + }, +) + +# %% +# DialogAgent +# ---------------------------- +# DialogAgent 是 AgentScope 中最基本的智能体,可以以对话的方式与用户交互。 +# 开发者可以通过提供不同的系统提示和模型配置来自定义它。 +# + +from agentscope.agents import DialogAgent +from agentscope.message import Msg + +# 初始化一个对话智能体 +alice = DialogAgent( + name="Alice", + model_config_name="my-qwen-max", + sys_prompt="你是一个名叫 Alice 的助手。", +) + +# 向智能体发送一条消息 +msg = Msg("Bob", "嗨!你叫什么名字?", "user") +response = alice(msg) + +# %% +# UserAgent +# ---------------------------- +# `UserAgent` 类允许用户与其他智能体交互。 +# 当调用 `UserAgent` 对象时,它会要求用户输入,并将其格式化为 `Msg` 对象。 +# +# 这里我们展示如何初始化一个 `UserAgent` 对象,并与对话智能体 `alice` 进行交互。 +# + +from agentscope.agents import UserAgent +from io import StringIO +import sys + +user = UserAgent( + name="Bob", + input_hint="用户输入: \n", +) + +# 模拟用户输入 +sys.stdin = StringIO("你认识我吗?\n") + +msg = user() +msg = alice(msg) + +# %% +# DictDialogAgent +# ---------------------------- +# `DictDialogAgent` 支持结构化输出,并可通过 `set_parser` 方法指定解析器来实现自动后处理。 +# +# 我们首先初始化一个 `DictDialogAgent` 对象,然后通过更换解析器,实现不同结构化的输出。 +# + +from agentscope.agents import DictDialogAgent +from agentscope.parsers import MarkdownJsonDictParser + + +charles = DictDialogAgent( + name="Charles", + model_config_name="my-qwen-max", + sys_prompt="你是一个名叫 Charles 的助手。", + max_retries=3, # 获取所需结构化输出失败时的最大重试次数 +) + +# 要求智能体生成包含 `thought`、`speak` 和 `decision` 的结构化输出 +parser1 = MarkdownJsonDictParser( + content_hint={ + "thought": "你的想法", + "speak": "你要说的话", + "decision": "你的最终决定,true/false", + }, + required_keys=["thought", "speak", "decision"], +) + +charles.set_parser(parser1) +msg1 = charles(Msg("Bob", "在下雨天外出是个好主意吗?", "user")) + +print(f"内容字段: {msg1.content}") +print(f"内容字段的类型: {type(msg1.content)}") + +# %% +# 然后,我们要求智能体从 1 到 10 中选择一个数字。 +# + +parser2 = MarkdownJsonDictParser( + content_hint={ + "thought": "你的想法", + "speak": "你要说的话", + "number": "你选择的数字", + }, +) + +charles.set_parser(parser2) +msg2 = charles(Msg("Bob", "从 1 到 10 中选择一个数字。", "user")) + +print(f"响应消息的内容: {msg2.content}") + +# %% +# 下一个问题是如何对结构化输出进行后处理。 +# 例如,`thought` 字段应该存储在记忆中而不暴露给其他人, +# 而 `speak` 字段应该显示给用户,`decision` 字段应该能够在响应消息对象中轻松访问。 +# + +parser3 = MarkdownJsonDictParser( + content_hint={ + "thought": "你的想法", + "speak": "你要说的话", + "number": "你选择的数字", + }, + required_keys=["thought", "speak", "number"], + keys_to_memory=["thought", "speak", "number"], # 需要存储在记忆中 + keys_to_content="speak", # 需要显示给用户 + keys_to_metadata="number", # 需要存储在响应消息的元数据中 +) + +charles.set_parser(parser3) + +msg3 = charles(Msg("Bob", "从 20 到 30 中选择一个数字。", "user")) + +print(f"内容字段: {msg3.content}") +print(f"内容字段的类型: {type(msg3.content)}\n") + +print(f"元数据字段: {msg3.metadata}") +print(f"元数据字段的类型: {type(msg3.metadata)}") + + +# %% +# .. hint:: 有关结构化输出的高级用法和更多不同解析器,请参阅 :ref:`structured-output` 章节。 +# +# ReActAgent +# ---------------------------- +# `ReActAgent` 以 reasoning-acting 循环的方式使用工具来解决给定的问题。 +# +# 首先我们为智能体准备一个工具函数。 +# + +from agentscope.service import ServiceToolkit, execute_python_code + + +toolkit = ServiceToolkit() + +# 通过指定部分参数将 execute_python_code 设置为工具,这里用户需要在 add 方法里面配置部分 +# 参数,通常是一些应该由开发者提供的参数,例如 API Key 等,剩余参数由智能体自己填写。 +toolkit.add( + execute_python_code, + timeout=300, + use_docker=False, + maximum_memory_bytes=None, +) + +# %% +# 然后我们初始化一个 `ReActAgent` 来解决给定的问题。 +# + +from agentscope.agents import ReActAgent + +david = ReActAgent( + name="David", + model_config_name="my-qwen-max", + sys_prompt="你是一个名叫 David 的助手。", + service_toolkit=toolkit, + max_iters=10, + verbose=True, +) + +task = Msg("Bob", "请帮我计算 151513434*54353453453。", "user") + +response = david(task) + + +# %% +# LlamaIndexAgent +# ---------------------------- +# 有关更多详细信息,请参阅检索增强型生成 (RAG) 章节。 +# diff --git a/docs/tutorial/zh/source/tutorial/conversation.py b/docs/tutorial/zh/source/tutorial/conversation.py new file mode 100644 index 000000000..e3b15d245 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/conversation.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +""" +.. _build-conversation: + +构建对话 +====================== + +AgentScope 中,不同智能体之间通过“显式的消息交换”来构建对话。 + +""" + +from agentscope.agents import DialogAgent, UserAgent +from agentscope.message import Msg +from agentscope import msghub +import agentscope + +# 为简单起见,通过模型配置进行初始化 +agentscope.init( + model_configs={ + "config_name": "my-qwen-max", + "model_name": "qwen-max", + "model_type": "dashscope_chat", + }, +) + +# %% +# 两个智能体 +# ----------------------------- +# 这里我们构建一个简单的对话,对话双方是两个智能体 `Angel` 和 `Monster`。 +# + +angel = DialogAgent( + name="Angel", + sys_prompt="你是一个名叫 Angel 的歌手,说话风格简单凝练。", + model_config_name="my-qwen-max", +) + +monster = DialogAgent( + name="Monster", + sys_prompt="你是一个名叫 Monster 的运动员,说话风格简单凝练。", + model_config_name="my-qwen-max", +) + +# %% +# 现在,我们让这两个智能体对话三轮。 +# + +msg = None +for _ in range(3): + msg = angel(msg) + msg = monster(msg) + +# %% +# 如果你想参与到对话中,只需实例化一个内置的 `UserAgent` 来向智能体输入消息。 +# + +user = UserAgent(name="User") + +# %% +# 多于两个智能体 +# --------------------- +# 当一个对话中有多于两个智能体时,来自一个智能体的消息应该广播给所有其他智能体。 +# +# 为了简化广播消息的操作,AgentScope 提供了一个 `msghub` 模块。 +# 具体来说,在同一个 `msghub` 中的智能体会自动接收其它参与者发送的消息。 +# 我们只需要组织发言的顺序,而不需要显式地将消息传递给其它智能体。 +# +# 当然,你也可以显式传递消息,但是记忆模块中的查重操作会自动跳过添加重复的消息。因此不会造成记忆中的消息重复。 +# +# 这里是一个 `msghub` 的示例,我们首先创建三个智能体:Alice、Bob 和 Charlie,它们都使用` qwen-max` 模型。 + +alice = DialogAgent( + name="Alice", + sys_prompt="你是一个名叫Alice的助手。", + model_config_name="my-qwen-max", +) + +bob = DialogAgent( + name="Bob", + sys_prompt="你是一个名叫Bob的助手。", + model_config_name="my-qwen-max", +) + +charlie = DialogAgent( + name="Charlie", + sys_prompt="你是一个名叫Charlie的助手。", + model_config_name="my-qwen-max", +) + +# %% +# 三个智能体将在对话中轮流报数。 +# + +# 介绍对话规则 +greeting = Msg( + name="user", + content="现在开始从1开始逐个报数,每次只产生一个数字,绝对不能产生其他任何信息。", + role="user", +) + +with msghub( + participants=[alice, bob, charlie], + announcement=greeting, # 开始时,通知消息将广播给所有参与者。 +) as hub: + # 对话的第一轮 + alice() + bob() + charlie() + + # 我们可以动态管理参与者,例如从对话中删除一个智能体 + hub.delete(charlie) + + # 向所有参与者广播一条消息 + hub.broadcast( + Msg( + "user", + "Charlie已离开对话。", + "user", + ), + ) + + # 对话的第二轮 + alice() + bob() + charlie() + +# %% +# 现在我们打印Alice和Bob的记忆,以检查操作是否成功。 + +print("Alice的记忆:") +for msg in alice.memory.get_memory(): + print(f"{msg.name}:{msg.content}") + +print("\nCharlie的记忆:") +for msg in charlie.memory.get_memory(): + print(f"{msg.name}:{msg.content}") + +# %% +# 在上面的示例中,Charlie 在第一轮结束后离开了对话,因此没有收到 Alice 和 Bob 的"4"和"5"。 +# 所以第二轮时它报了"4"。 +# 另一方面,Alice 和 Bob 继续了对话,没有 Charlie 的参与。 +# diff --git a/docs/tutorial/zh/source/tutorial/distribution.py b/docs/tutorial/zh/source/tutorial/distribution.py new file mode 100644 index 000000000..cea79e50e --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/distribution.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: sphinx +# format_version: '1.1' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +""" +.. _distribution: + +分布式 +============ + +本节介绍 AgentScope 分布式的使用方法。AgentScope 原生提供了基于 gRPC 的分布式模式, +在这种模式下,一个应用程序中的多个智能体可以部署到不同的进程或者甚至不同的机器上,从而充分利用计算资源,提高效率。 + +基本使用 +~~~~~~~~~~~ + +与传统模式相比,AgentScope 的分布式模式不需要修改主进程代码。只需在初始化智能体时调用 `to_dist` 函数。 + +本节将展示如何使用 AgentScope 的分布式模式进行网页搜索。 +为了展示 AgentScope 分布式模式带来的加速效果,示例中将自定义一个 `WebAgent` 类,在该类型将等待5秒来模拟抓取网页和从中寻找答案的过程。 + +执行搜索的过程是 `run` 函数。传统模式和分布式模式之间唯一的区别在于初始化阶段,即 `init_without_dist` 和 `init_with_dist`。 +在分布式模式下,您需要调用 `to_dist` 函数,将原始智能体转换为对应的分布式版本。 + +.. code-block:: python + + # 请勿在jupyter notebook中运行此代码。 + # 请将代码复制到 `dist_main.py` 文件中,并使用 `python dist_main.py` 命令运行此代码。 + # 在运行此代码之前,请安装分布式版本的 agentscope。 + + import time + import agentscope + from agentscope.agents import AgentBase + from agentscope.message import Msg + + class WebAgent(AgentBase): + + def __init__(self, name): + super().__init__(name) + + def get_answer(self, url: str, query: str): + time.sleep(5) + return f"来自 {self.name} 的答案" + + def reply(self, x: dict = None) -> dict: + return Msg( + name=self.name, + content=self.get_answer(x.content["url"], x.content["query"]) + ) + + + QUERY = "示例查询" + URLS = ["页面_1", "页面_2", "页面_3", "页面_4", "页面_5"] + + def init_without_dist(): + return [WebAgent(f"W{i}") for i in range(len(URLS))] + + + def init_with_dist(): + return [WebAgent(f"W{i}").to_dist() for i in range(len(URLS))] + + + def run(agents): + start = time.time() + results = [] + for i, url in enumerate(URLS): + results.append(agents[i].reply( + Msg( + name="system", + role="system", + content={ + "url": url, + "query": QUERY + } + ) + )) + for result in results: + print(result.content) + end = time.time() + return end - start + + + if __name__ == "__main__": + agentscope.init() + start = time.time() + simple_agents = init_without_dist() + dist_agents = init_with_dist() + end = time.time() + print(f"初始化时间:{end - start}") + print(f"无分布式模式下的运行时间:{run(simple_agents)}") + print(f"分布式模式下的运行时间:{run(dist_agents)}") + + +运行此示例的输出如下: + +.. code-block:: text + + 初始化时间:16.50428819656372 + 来自 W0 的答案 + 来自 W1 的答案 + 来自 W2 的答案 + 来自 W3 的答案 + 来自 W4 的答案 + 无分布式模式下的运行时间:25.034368991851807 + 来自 W0 的答案 + 来自 W1 的答案 + 来自 W3 的答案 + 来自 W2 的答案 + 来自 W4 的答案 + 分布式模式下的运行时间:5.0517587661743164 + +从上面的示例输出中,我们可以观察到在采用分布式模式后(25秒->5秒),运行速度显著提高。 + +上面的示例是AgentScope分布式模式最常见的使用场景。当不追求极端性能时,建议直接使用这种方法。 +如果您需要进一步优化性能,则需要对AgentScope分布式模式有更深入的了解。 +下面将介绍AgentScope分布式模式的高级用法。 +""" + +############################################################################### +# 高级用法 +# ~~~~~~~~~~~~~~~ +# +# 本节将介绍 AgentScope 分布式模式的高级使用方法,以进一步提高操作效率。 +# +# 基本概念 +# -------------- +# +# +# 在深入学习高级用法之前,我们必须先了解AgentScope分布式模式的一些基本概念。 +# +# - **主进程**:AgentScope应用程序所在的进程被称为主进程。例如,上一节中的 `run` 函数就是在主进程中运行的。每个 AgentScope 应用程序只有一个主进程。 +# - **智能体服务器进程**:AgentScope智能体服务器进程是智能体在分布式模式下运行的进程。例如,在上一节中,`dist_agents` 中的所有智能体都在智能体服务器进程中运行。可以有多个AgentScope智能体服务器进程。这些进程可以运行在任何可网络访问的机器上,每个智能体服务器进程中可以同时运行多个智能体。 +# - **子进程模式**:在子进程模式下,智能体服务器进程由主进程启动为子进程。例如,在上一节中,`dist_agents` 中的每个智能体实际上都是主进程的一个子进程。这是默认模式,也就是说,如果您直接调用 `to_dist` 函数而不传入任何参数,它将使用此模式。 +# - **独立进程模式**:在独立进程模式下,智能体服务器与主进程无关,需要预先在机器上启动。需要向 `to_dist` 函数传递特定参数,具体用法将在下一节中介绍。 +# +# 使用独立进程模式 +# ---------------------- +# +# 与子进程模式相比,独立进程模式可以避免初始化子进程的开销,从而减少执行开始时的延迟。这可以有效地提高多次调用 `to_dist` 的程序的效率。 +# +# 在独立进程模式下,您需要预先在机器上启动智能体服务器进程,并向 `to_dist` 函数传递特定参数。这里,我们将继续使用基本用法一节中的示例,假设基本用法的代码文件为 `dist_main.py`。然后,创建并单独运行以下脚本。 +# +# .. code-block:: python +# +# # 请勿在jupyter notebook中运行此代码。 +# # 将此代码复制到名为 `dist_server.py` 的文件中,并使用命令 `python dist_server.py` 运行。 +# # 在运行此代码之前,请安装分布式版本的 agentscope。 +# # pip install agentscope[distributed] +# +# from dist_main import WebAgent +# import agentscope +# +# if __name__ == "__main__": +# agentscope.init() +# assistant_server_launcher = RpcAgentServerLauncher( +# host="localhost", +# port=12345, +# custom_agent_classes=[WebAgent], +# ) +# assistant_server_launcher.launch(in_subprocess=False) +# assistant_server_launcher.wait_until_terminate() +# +# +# 该脚本在 `dist_server.py` 文件中启动AgentScope智能体服务器进程,该文件位于与基本用法中的 `dist_main.py` 文件相同的目录下。此外,我们还需要对 `dist_main.py` 文件做一些小的修改,添加一个新的 `init_with_dist_independent` 函数,并用这个新函数替换对 `init_with_dist` 的调用。 +# +# .. code-block:: python +# +# def init_with_dist_independent(): +# return [WebAgent(f"W{i}").to_dist(host="localhost", port=12345) for i in range(len(URLS))] +# +# if __name__ == "__main__": +# agentscope.init() +# start = time.time() +# simple_agents = init_without_dist() +# dist_agents = init_with_dist_independent() +# end = time.time() +# print(f"初始化所需时间:{end - start}") +# print(f"无分布式模式下的运行时间:{run(simple_agents)}") +# print(f"分布式模式下的运行时间:{run(dist_agents)}") +# +# +# 完成修改后,打开一个命令提示符并运行 `dist_server.py` 文件。一旦成功启动,再打开另一个命令提示符并运行 `dist_main.py` 文件。 +# +# 此时,`dist_main.py` 的输出中初始化时间将显著减少。例如,这里的初始化时间仅为0.02秒。 +# +# .. code-block:: text +# +# 初始化所需时间:0.018129825592041016 +# ... +# +# +# 需要注意的是,上面的示例中使用了 `host="localhost"` 和 `port=12345` ,并且 `dist_main.py` 和 `dist_server.py` 都在同一台机器上运行。在实际使用时,`dist_server.py`可以运行在不同的机器上。此时,`host` 应该设置为运行 `dist_server.py` 的机器的 IP 地址,而 `port` 应该设置为任何可用端口,确保不同机器可以通过网络进行通信。 +# +# 避免重复初始化 +# ------------------------------ +# +# 在上面的代码中,`to_dist` 函数是在已经初始化过的智能体上调用的。`to_dist` 的本质是将原始智能体克隆到智能体服务器进程中,同时在主进程中保留一个 `RpcAgent` 作为原始智能体的代理。对这个 `RpcAgent` 的调用将被转发到智能体服务器进程中对应的智能体。 +# +# 这种做法存在一个潜在问题:原始智能体会被初始化两次——一次在主进程中,一次在智能体服务器进程中,而且这两次初始化是按顺序执行的,无法通过并行来加速。对于初始化成本较低的智能体,直接调用 `to_dist` 函数不会对性能造成显著影响。但是对于初始化成本较高的智能体,避免冗余初始化就很重要。因此,AgentScope分布式模式提供了一种分布式模式初始化的替代方法,允许直接在任何智能体的初始化函数中传递 `to_dist` 参数,如下面修改后的示例所示: +# +# .. code-block:: python +# +# def init_with_dist(): +# return [WebAgent(f"W{i}", to_dist=True) for i in range(len(URLS))] +# +# +# def init_with_dist_independent(): +# return [WebAgent(f"W{i}", to_dist={"host": "localhost", "port": "12345"}) for i in range(len(URLS))] +# +# +# 对于子进程模式,您只需在初始化函数中传递 `to_dist=True` 即可。对于独立进程模式,则需要将原本传递给 `to_dist` 函数的参数以字典形式传递给 `to_dist` 字段。 diff --git a/docs/tutorial/zh/source/tutorial/examples.py b/docs/tutorial/zh/source/tutorial/examples.py new file mode 100644 index 000000000..ef3974b8c --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/examples.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +""" +样例 +======================== + +配置类 +------------------------ + +.. raw:: html + + + + + + + + + + +------------------------ + +智能体 +------------------------ + +.. raw:: html + + + + + + + + + + + + + + + + +------------------------ + +游戏 +------------------------ + +.. raw:: html + + + + + + + + + + + +------------------------ + +对话 +------------------------ + +.. raw:: html + + + + + + + + + + + +""" diff --git a/docs/tutorial/zh/source/tutorial/faq.md b/docs/tutorial/zh/source/tutorial/faq.md new file mode 100644 index 000000000..ab5dae86e --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/faq.md @@ -0,0 +1,48 @@ +# 常见问题解答 + +## 关于 AgentScope +_**Q**:AgentScope 与其他代理平台/框架有什么区别?_ +
+**A**:AgentScope 是一个面向开发者的多智能体平台,旨在简化**多智能体应用程序**的开发、部署和监控。 + +## 关于模型 + +_**Q**:如何在 AgentScope 中集成/使用新的模型 API?_ +
+**A**:请参考 [集成新的 LLM API](integrating_new_api) 。 + +_**Q**:AgentScope 支持哪些 LLM?_ +
+**A**:AgentScope 支持大多数现有的 LLM API,包括 OpenAI、Claude、Gemini、DashScope 等。支持列表请参考 [模型 API](model_api) 。 + +_**Q**:如何在 AgentScope 中监控 token 使用情况?_ +
+**A**:详情请参考 [监控 Token 使用情况](token_usage)。 + +## 关于工具 + +_**Q**:AgentScope 提供了哪些工具?_ +
+**A**:请参考 [工具](tools)。 + +_**Q**:如何在 AgentScope 中使用这些工具?_ +
+**A**:AgentScope 提供了 `ServiceToolkit` 模块用于工具使用。详细用法请参考 [工具](tools)。 + +## 关于智能体 + +_**Q**:如何在 AgentScope 中使用智能体?_ +
+**A**:您可以使用 AgentScope 中内置的智能体,或开发自己的智能体。详情请参考 [内置智能体](builtin_agent) 一节。 + +## 关于 GUI + +_**Q**:AgentScope 提供了哪些 GUI?_ +
+**A**:AgentScope 支持在 Gradio 中运行您的应用程序,并且还提供了一个名为 AgentScope Studio 的 GUI,供您监控和管理应用程序。 + +## 关于低代码开发 + +_**Q**:AgentScope 中的低代码开发是什么?_ +
+**A**:它意味着您可以通过拖拽组件来开发应用程序。详情请参考 [低代码开发](low_code)。 diff --git a/docs/tutorial/zh/source/tutorial/low_code.py b/docs/tutorial/zh/source/tutorial/low_code.py new file mode 100644 index 000000000..ab1402f1e --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/low_code.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +""" +.. _low_code: + +低代码开发 +=========================== +本教程介绍如何在AgentScope Workstation中通过拖拽界面构建多智能体应用程序。 + +Workstation +------------------ + +Workstation现已集成在 :ref:`agentscope-studio` 中。 +它为零代码用户提供了一种更简单的方式来构建多智能体应用程序。 + +.. note:: Workstation 正处于快速迭代开发中,将会频繁更新。 + +启动 Workstation +--------------------- + +首先确保您已安装最新版本的 AgentScope。 + +执行以下Python代码来启动 AgentScope Studio: + +.. code-block:: python + + import agentscope + agentscope.studio.init() + +或在终端中运行以下 bash 命令: + +.. code-block:: bash + + as_studio + +然后访问 `https://127.0.0.1:5000` 进入 AgentScope Studio,并点击侧边栏中的 Workstation 图标进入。 + + +* **中央工作区**:您可以在这个主要区域拖拽组件来构建应用程序。 + +* **顶部工具箱**:用于导入、导出、检查和运行您的应用程序。 + +.. image:: https://img.alicdn.com/imgextra/i1/O1CN01RXAVVn1zUtjXVvuqS_!!6000000006718-1-tps-3116-1852.gif + +内置示例 +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +对于初学者,我们强烈建议从预构建的示例开始。您可以直接单击示例将其导入到中央工作区。或者,为了获得更加结构化的学习体验,您也可以选择跟随每个示例链接的教程。这些教程将一步步指导如何在 AgentScope Workstation 上构建多智能体应用。 + +构建应用 +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +要构建应用程序,请执行以下步骤: + +* 选择并拖拽组件:从侧边栏中单击并拖拽所选组件到中央工作区。 + +* 连接节点:大多数节点都有输入和输出点。单击一个组件的输出点并拖拽到另一个组件的输入点,以创建消息流管道。这样不同的节点就可以传递消息。 + +* 配置节点:将节点放入工作区后,单击任意一个节点来填写其配置设置。您可以自定义提示、参数和其他属性。 + +运行应用 +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +构建完应用程序后,单击"运行"按钮。在运行之前,Workstation会检查应用程序中是否有任何错误。如果有错误,系统会提示您在继续之前进行修正。之后,您的应用程序将在与AgentScope Studio相同的Python环境中执行,您可以在Dashboard中找到它。 + +导入/导出应用 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Workstation支持导入和导出您的应用程序。单击"导出HTML"或"导出Python"按钮可生成代码,您可以将其分发给社区或本地保存。如果要将导出的代码转换为Python代码,可以使用以下命令将JSON配置编译为Python代码: + +.. code-block:: bash + + # 编译 + as_workflow config.json --compile ${YOUR_PYTHON_SCRIPT_NAME}.py + +如果您想直接运行本地配置,可以使用以下命令: + +.. code-block:: bash + + # 运行 + as_gradio config.json + + +想要进一步编辑您的应用程序吗?只需单击"导入HTML"按钮,将以前导出的HTML代码重新上传到AgentScope Workstation即可。 + +检查应用 +^^^^^^^^^^^^^^^^^^^^^^^^^ + +构建应用程序后,您可以单击"检查"按钮来验证应用程序结构的正确性。将执行以下检查规则: + +* 模型和智能体存在检查:每个应用程序必须至少包含一个模型节点和一个智能体节点。 + +* 单连接策略:每个组件的每个输入不应该有多于一个连接。 + +* 必填字段验证:所有必填输入字段都必须填写,以确保每个节点都有正确运行所需的参数。 + +* 配置命名一致性:智能体节点使用的"模型配置名称"必须与模型节点中定义的"配置名称"相对应。 + +* 适当的节点嵌套:像ReActAgent这样的节点应该只包含工具节点。同样,像IfElsePipeline这样的管道节点应该包含正确数量的元素(不超过2个),而ForLoopPipeline、WhileLoopPipeline和MsgHub应该遵守只有一个元素的规则(必须是SequentialPipeline作为子节点)。 + +""" diff --git a/docs/tutorial/zh/source/tutorial/message.py b/docs/tutorial/zh/source/tutorial/message.py new file mode 100644 index 000000000..a3fffb327 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/message.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +""" +.. _message: + +消息 +==================== + +消息是一种专用的数据结构,用于信息交换。 +在 AgentScope 中,我们使用消息在智能体之间进行通信。 + +消息的最重要字段是:name、role 和 content。 +name 和 role 字段标识消息的发送者,content 字段包含实际信息。 + +.. Note:: role 字段必须选择 `"system"`、`"assistant"`、 `"user"` 其中之一。 +""" + +from agentscope.message import Msg +import json + +# %% +# 创建消息 +# ---------------- +# 可以通过指定 name、role 和 content 字段来创建消息。 +# + +msg = Msg( + name="Jarvis", + role="assistant", + content="嗨!我能为您效劳吗?", +) + +print(f'消息发送者:"{msg.name}"') +print(f'发送者角色:"{msg.role}"') +print(f'消息内容:"{msg.content}"') + +# %% +# 序列化 +# ---------------- +# 消息可以序列化为 JSON 格式的字符串。 +# + +serialized_msg = msg.to_dict() + +print(type(serialized_msg)) +print(json.dumps(serialized_msg, indent=4)) + +# %% +# 反序列化 +# ---------------- +# 从 JSON 格式的字典反序列化消息。 +# + +new_msg = Msg.from_dict(serialized_msg) + +print(new_msg) +print(f'消息发送者:"{new_msg.name}"') +print(f'发送者角色:"{new_msg.role}"') +print(f'消息内容:"{new_msg.content}"') diff --git a/docs/tutorial/zh/source/tutorial/model.py b/docs/tutorial/zh/source/tutorial/model.py new file mode 100644 index 000000000..e91b4c626 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/model.py @@ -0,0 +1,347 @@ +# -*- coding: utf-8 -*- +""" +.. _model_api: + +模型 API +==================== + +AgentScope 已集成了许多不同模态的模型 API 。 + +.. note:: 1. 本列表不包括文本到语音(TTS)和语音到文本(STT)API。您可以参考 :ref:`tools` 一节。 + 2. 本节仅介绍如何在AgentScope中使用或集成不同的模型API。关于提示要求和提示工程策略的内容将在 :ref:`prompt-engineering` 一节中介绍。 + + +.. list-table:: + :header-rows: 1 + + * - API + - 对话 + - 文本生成 + - 视觉 + - Embedding + * - OpenAI + - ✓ + - ✗ + - ✓ + - ✓ + * - DashScope + - ✓ + - ✗ + - ✓ + - ✓ + * - Gemini + - ✓ + - ✗ + - ✗ + - ✓ + * - Ollama + - ✓ + - ✓ + - ✓ + - ✓ + * - Yi + - ✓ + - ✗ + - ✗ + - ✗ + * - LiteLLM + - ✓ + - ✗ + - ✗ + - ✗ + * - ZhipuAI + - ✓ + - ✗ + - ✗ + - ✓ + * - Anthropic + - ✓ + - ✗ + - ✗ + - ✗ + +在 AgentScope 中使用模型 API 有两种方式。可以根据自己的需求进行选择: + +- **使用模型配置**:这是构建与模型 API 无关的应用程序的推荐方式。可以通过修改配置来更改模型 API,而无需更改智能体代码。 +- **显式初始化模型**:如果只想使用特定的模型 API,显式初始化模型会更加方便和透明。API 文档字符串提供了参数和用法的详细信息。 + +.. tip:: 实际上,使用配置和显式初始化模型是等效的。使用模型配置时,AgentScope 只是将配置中的键值对传递给模型的构造函数。 +""" + +import os + +from agentscope.models import ( + DashScopeChatWrapper, + ModelWrapperBase, + ModelResponse, +) +import agentscope + +# %% +# 使用配置 +# ------------------------------ +# 在模型配置中,需要提供以下三个字段: +# +# - config_name:配置的名称。 +# - model_type:模型 API 的类型,例如 "dashscope_chat"、"openai_chat" 等。 +# - model_name:模型的名称,例如 "qwen-max"、"gpt-4o" 等。 +# +# 在使用模型 API 之前通过调用 `agentscope.init()` 来加载配置,如下所示: +# + +agentscope.init( + model_configs=[ + { + "config_name": "gpt-4o_temperature-0.5", + "model_type": "openai_chat", + "model_name": "gpt-4o", + "api_key": "xxx", + "temperature": 0.5, + }, + { + "config_name": "my-qwen-max", + "model_type": "dashscope_chat", + "model_name": "qwen-max", + }, + ], +) + +# %% +# 对于其它可选参数,可以查看对应模型 API 的构造函数的说明。 + +# %% +# 显式初始化模型 +# -------------------------------- +# `agentscope.models` 模块提供了所有的内置模型 API。 +# 您可以通过调用相应的模型类来显式初始化模型。 +# + +# 打印 agentscope.models 下的模块 +for module_name in agentscope.models.__all__: + if module_name.endswith("Wrapper"): + print(module_name) + +# %% +# 以 DashScope Chat API 为例: +# + +model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", + api_key=os.environ["DASHSCOPE_API_KEY"], + stream=False, +) + +response = model( + messages=[ + {"role": "user", "content": "嗨!"}, + ], +) + +# %% +# `response` 是 `agentscope.models.ModelResponse` 的一个对象,它包含以下字段: +# +# - text:生成的文本 +# - embedding:生成的嵌入 +# - image_urls:引用生成的图像 +# - raw:来自 API 的原始响应 +# - parsed:解析后的响应,例如将字符串解析成 JSON 对象 +# - stream:用来挂载流式响应的生成器,更多详情请参考 :ref:`streaming` 一节。 + +print(f"文本:{response.text}") +print(f"嵌入:{response.embedding}") +print(f"图像URL:{response.image_urls}") +print(f"原始响应:{response.raw}") +print(f"解析后响应:{response.parsed}") +print(f"流响应:{response.stream}") + +# %% +# .. _integrating_new_api: +# +# 集成新的 LLM API +# ---------------------------- +# 将新的 LLM API 集成到 AgentScope 有两种方式。 +# +# OpenAI 兼容 API +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# 如果您的模型与 OpenAI Python API 兼容,建议重用 `OpenAIChatWrapper` 类并提供特定参数。 +# +# .. note:: 需要确保 API 的回复同样兼容 OpenAI Python API。 +# +# 以 vLLM 为例, +# 其 `官方文档 `_ 提供了以下使用 OpenAI Python 库的示例: +# +# .. code-block:: python +# +# from openai import OpenAI +# client = OpenAI( +# base_url="http://localhost:8000/v1", +# api_key="token-abc123", +# ) +# +# completion = client.chat.completions.create( +# model="NousResearch/Meta-Llama-3-8B-Instruct", +# messages=[ +# {"role": "user", "content": "Hello!"} +# ], +# temperature=0.5, +# ) +# +# print(completion.choices[0].message) +# +# +# 将 vLLM 集成到 AgentScope 非常简单,如下: +# +# - 将初始化 OpenAI 客户端的参数(除了 `api_key`)放入 `client_args`, +# - 将生成完成的参数(除了 `model`)放入 `generate_args`。 +# + +vllm_model_config = { + "model_type": "openai_chat", + "config_name": "vllm_llama2-7b-chat-hf", + "model_name": "meta-llama/Llama-2-7b-chat-hf", + "api_key": "token-abc123", # API 密钥 + "client_args": { + "base_url": "http://localhost:8000/v1/", # 用于指定 API 的基础 URL + }, + "generate_args": { + "temperature": 0.5, # 生成参数,如 temperature、seed + }, +} + +# %% +# 或者,直接用参数初始化 OpenAI Chat API 的模型类: +# + +from agentscope.models import OpenAIChatWrapper + +model = OpenAIChatWrapper( + config_name="", + model_name="meta-llama/Llama-2-7b-chat-hf", + api_key="token-abc123", + client_args={"base_url": "http://localhost:8000/v1/"}, + generate_args={"temperature": 0.5}, +) + +# %% +# RESTful API +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# 如果您的模型通过 RESTful post API 访问,并且响应格式与 OpenAI API 兼容,可以考虑使用 `PostAPIChatWrapper`。 +# +# 以下面的 curl 命令为例,只需将 header、API URL 和 data(除了 `messages`,它将自动传递)提取成模型类的初始化参数即可。 +# +# 一个示例 post 请求: +# +# .. code-block:: bash +# +# curl https://api.openai.com/v1/chat/completions +# -H "Content-Type: application/json" +# -H "Authorization: Bearer $OPENAI_API_KEY" +# -d '{ +# "model": "gpt-4o", +# "messages": [ +# {"role": "user", "content": "write a haiku about ai"} +# ] +# }' +# +# 相应的模型类初始化如下: +# + +from agentscope.models import PostAPIChatWrapper + +post_api_model = PostAPIChatWrapper( + config_name="", + api_url="https://api.openai.com/v1/chat/completions", # 目标 URL + headers={ + "Content-Type": "application/json", # 来自头部 + "Authorization": "Bearer $OPENAI_API_KEY", + }, + json_args={ + "model": "gpt-4o", # 来自数据 + }, +) + +# %% +# 它的模型配置如下: +# + +post_api_config = { + "config_name": "{my_post_model_config_name}", + "model_type": "post_api_chat", + "api_url": "https://api.openai.com/v1/chat/completions", + "headers": { + "Authorization": "Bearer {YOUR_API_TOKEN}", + }, + "json_args": { + "model": "gpt-4o", + }, +} + +# %% +# 如果你的模型 API 返回格式与 OpenAI 不同,可以继承 `PostAPIChatWrapper` 并重写 `_parse_response` 方法。 +# +# .. note:: 需要在子类中定义一个新的 `model_type` 字段,以区分它与现有的模型类。 +# +# + + +class MyNewModelWrapper(PostAPIChatWrapper): + model_type: str = "{my_new_model_type}" + + def _parse_response(self, response: dict) -> ModelResponse: + """解析来自 API 服务器的响应。 + + Args: + response (`dict`): + 从 API 服务器获取的响应,并通过 + `response.json()` 解析为统一的格式。 + + Returns (`ModelResponse`): + 解析后的响应。 + """ + # TODO: 将以下代码替换为您自己的解析逻辑 + return ModelResponse( + text=response["data"]["response"]["choices"][0]["message"][ + "content" + ], + ) + + +# %% +# 自定义模型类 +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# 如果需要从头开始实现新的模型类,首先需要了解 AgentScope 中的以下概念: +# +# - **model_type**:在使用模型配置时,AgentScope 使用 `model_type` 字段来区分不同的模型 API。因此,请确保您的新模型类具有唯一的 `model_type`。 +# - **__init__**:从模型配置初始化时,AgentScope 会将配置中的所有键值对传递给模型类的 `__init__` 方法。因此,请确保您的 `__init__` 方法可以处理配置中的所有参数。 +# - **__call__**:模型类的核心方法是 `__call__`,它接收输入消息并返回响应。其返回值应该是 `ModelResponse` 对象。 +# + + +class MyNewModelWrapper(ModelWrapperBase): + model_type: str = "{my_new_model_type}" + + def __init__(self, config_name, model_name, **kwargs) -> None: + super().__init__(config_name, model_name=model_name) + + # TODO: 在这里初始化您的模型 + + def __call__(self, *args, **kwargs) -> ModelResponse: + # TODO: 在这里实现您的模型的核心逻辑 + + return ModelResponse( + text="Hello, World!", + ) + + +# %% +# .. tip:: 可选地,可以在模型中实现一个 `format` 方法,从而将 AgentScope 中的 `Msg` 类 转化为目标模型 API 要求的格式。更多详情请参考 :ref:`prompt-engineering`。 +# +# 进一步阅读 +# --------------------- +# - :ref:`prompt-engineering` +# - :ref:`streaming` +# - :ref:`structured-output` diff --git a/docs/tutorial/zh/source/tutorial/monitor.py b/docs/tutorial/zh/source/tutorial/monitor.py new file mode 100644 index 000000000..0f7941687 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/monitor.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +""" +.. _configuring_and_monitoring: + +配置和监控 +================================== + +AgentScope 的主入口是 `agentscope.init`,在这里您可以配置应用程序。 +""" + +import agentscope + + +agentscope.init( + model_configs=[ # 模型配置 + { + "config_name": "my-qwen-max", + "model_type": "dashscope_chat", + "model_name": "qwen-max", + }, + ], + project="项目 Alpha", # 项目名称 + name="测试-1", # 运行时名称 + disable_saving=False, # 是否禁用文件保存,推荐开启 + save_dir="./runs", # 保存目录 + save_log=True, # 是否保存日志 + save_code=False, # 是否保存此次运行的代码 + save_api_invoke=False, # 保存 API 调用 + cache_dir="~/.cache", # 缓存目录,用于缓存 Embedding 和其它 + use_monitor=True, # 是否监控 token 使用情况 + logger_level="INFO", # 日志级别 +) + +# %% +# 导出配置 +# -------------------------------- +# `state_dict` 方法可用于导出正在运行的应用程序的配置。 +# + +import json + +print(json.dumps(agentscope.state_dict(), indent=2)) + +# %% +# 运行监控 +# -------------------------- +# AgentScope 提供了 AgentScope Studio,这是一个 Web 可视化界面,用于监控和管理正在运行的应用程序和历史记录。 +# 有关更多详细信息,请参阅 :ref:`visual` 部分。 +# + +# %% +# .. _token_usage: +# +# 监控 Token 使用情况 +# ------------------------ +# `print_llm_usage` 将打印并返回当前运行应用程序的 token 使用情况。 +# + +from agentscope.models import DashScopeChatWrapper + +qwen_max = DashScopeChatWrapper( + config_name="-", + model_name="qwen-max", +) +qwen_plus = DashScopeChatWrapper( + config_name="-", + model_name="qwen-plus", +) + +# 调用 qwen-max 和 qwen-plus 来模拟 token 使用情况 +_ = qwen_max([{"role": "user", "content": "Hi!"}]) +_ = qwen_plus([{"role": "user", "content": "Who are you?"}]) + +usage = agentscope.print_llm_usage() + +print(json.dumps(usage, indent=2)) diff --git a/docs/tutorial/zh/source/tutorial/multimodality.py b/docs/tutorial/zh/source/tutorial/multimodality.py new file mode 100644 index 000000000..e11a6f2fe --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/multimodality.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +""" +.. _multimodality: + +多模态 +============================ + +在本节中,我们将展示如何在 AgentScope 中构建多模态应用程序。 + +构建视觉智能体 +------------------------------ + +对于大多数大语言模型 API,视觉和非视觉模型共享相同的 API,只是输入格式有所不同。 +在 AgentScope 中,模型包装器的 `format` 函数负责将输入的 `Msg` 对象转换为视觉模型所需的格式。 + +也就是说,我们只需指定视觉大语言模型而无需更改智能体的代码。 +有关 AgentScope 支持的视觉大语言模型 API,请参阅 :ref:`model_api` 部分。 + +以 "qwen-vl-max" 为例,我们将使用视觉大语言模型构建一个智能体。 +""" + +model_config = { + "config_name": "my-qwen-vl", + "model_type": "dashscope_multimodal", + "model_name": "qwen-vl-max", +} + +# %% +# +# 如往常一样,我们使用上述配置初始化 AgentScope,并使用视觉大语言模型创建一个新的智能体。 +# + +from agentscope.agents import DialogAgent +import agentscope + +agentscope.init(model_configs=model_config) + +agent = DialogAgent( + name="Monday", + sys_prompt="你是一个名为Monday的助手。", + model_config_name="my-qwen-vl", +) + +# %% +# 为了与智能体进行多模态数据的交互,`Msg` 类提供了一个 `url` 字段。 +# 你可以在 `url` 字段中放置本地或在线的图片 URL。 +# +# 这里让我们首先使用 matplotlib 创建一个图片 +# + +import matplotlib.pyplot as plt + +plt.figure(figsize=(6, 6)) +plt.bar(range(3), [2, 1, 4]) +plt.xticks(range(3), ["Alice", "Bob", "Charlie"]) +plt.title("The Apples Each Person Has in 2023") +plt.xlabel("Number of Apples") + +plt.show() +plt.savefig("./bar.png") + +# %% +# 然后,我们创建一个包含图像 URL 的 `Msg` 对象 +# + +from agentscope.message import Msg + +msg = Msg( + name="用户", + content="为我详细描述一下这个图片。", + role="user", + url="./bar.png", +) + +# %% +# 之后,我们可以将消息发送给视觉智能体并获取响应。 + +response = agent(msg) diff --git a/docs/tutorial/zh/source/tutorial/prompt.py b/docs/tutorial/zh/source/tutorial/prompt.py new file mode 100644 index 000000000..e862d288c --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/prompt.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +""" +.. _prompt-engineering: + +提示工程 +================================ + +提示工程是构建大语言模型应用的关键步骤,尤其是针对多智能体的应用。 +然而,目前市面上大多数 API 服务只专注于 Chat 场景,即对话只有两个参与者:用户(user)和 +助手(assistant),并且两者必须交替发送消息。 + +为了支持多智能体应用,AgentScope 构建了不同的提示策略,从而将一组 `Msg` 对象转换为模型 +API 需要的格式。 + +.. note:: 目前还没有一种提示工程可以做到一劳永逸。AgentScope 内置提示构建策略的目标 + 是让初学者可以顺利调用模型 API,而不是达到最佳性能。 + 对于高级用户,我们建议开发人员根据需求和模型 API 要求来自定义提示构建策略。 + +提示构建策略 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +内置提示策略在模型类的 `format` 方法中实现。以 DashScope Chat API 为例: + +""" + +from agentscope.models import DashScopeChatWrapper +from agentscope.message import Msg +import json + + +model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", +) + +# 可以将 `Msg` 对象或 `Msg` 对象列表传递给 `format` 方法 +prompt = model.format( + Msg("system", "You're a helpful assistant.", "system"), + [ + Msg("assistant", "Hi!", "assistant"), + Msg("user", "Nice to meet you!", "user"), + ], +) + +print(json.dumps(prompt, indent=4, ensure_ascii=False)) + +# %% +# 格式化输入消息后,我们可以将其传给 `model` 对象,进行实际的 API 调用。 +# + +response = model(prompt) + +print(response.text) + +# %% +# 非视觉模型 +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# 在下表中,我们列出了内置的提示策略,以及支持的大语言模型的前缀。 +# +# 以下面的消息为例: +# +# .. code-block:: python +# +# Msg("system", "You're a helpful assistant named Alice.", "system"), +# Msg("Alice", "Hi!", "assistant"), +# Msg("Bob", "Nice to meet you!", "user") +# +# +# .. list-table:: +# :header-rows: 1 +# +# * - LLMs +# - `model_name` +# - Constructed Prompt +# * - OpenAI LLMs +# - `gpt-` +# - .. code-block:: python +# +# [ +# { +# "role": "system", +# "name": "system", +# "content": "You're a helpful assistant named Alice." +# }, +# { +# "role": "user", +# "name": "Alice", +# "content": "Hi!" +# }, +# { +# "role": "user", +# "name": "Bob", +# "content": "Nice to meet you!" +# } +# ] +# * - Gemini LLMs +# - `gemini-` +# - .. code-block:: python +# +# [ +# { +# "role": "user", +# "parts": [ +# "You're a helpful assistant named Alice.\\n## Conversation History\\nAlice: Hi!\\nBob: Nice to meet you!" +# ] +# } +# ] +# * - All other LLMs +# +# (e.g. DashScope, ZhipuAI ...) +# - +# - .. code-block:: python +# +# [ +# { +# "role": "system", +# "content": "You're a helpful assistant named Alice." +# }, +# { +# "role": "user", +# "content": "## Conversation History\\nAlice: Hi!\\nBob: Nice to meet you!" +# } +# ] +# +# .. tip:: 考虑到一些 API 兼容不同的大语言模型(例如 OpenAI Python 库),AgentScope 使用 `model_name` 字段来区分不同的模型并决定最终使用的策略。 +# +# 视觉模型 +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# 对于视觉模型,AgentScope 目前支持 OpenAI 视觉模型和 Dashscope 多模态 API。 +# 未来将假如更多的视觉模型的支持。 +# diff --git a/docs/tutorial/zh/source/tutorial/prompt_optimization.py b/docs/tutorial/zh/source/tutorial/prompt_optimization.py new file mode 100644 index 000000000..30f157d73 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/prompt_optimization.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +""" +.. _system-prompt-optimization: + +系统提示优化 +============================ + +AgentScope 实现了一个用于优化智能体系统提示的模块。 + +.. _system-prompt-generator: + +系统提示生成器 +^^^^^^^^^^^^^^^^^^^^^^^^ + +系统提示生成器使用元提示(Meta prompt)来指导模型根据用户的要求生成系统提示,并允许开发人员使用内置示例或提供自己的示例作为上下文学习(ICL)。 + +系统提示生成器包括一个 `EnglishSystemPromptGenerator` 和一个 `ChineseSystemPromptGenerator` 模块,它们只在使用的语言上有所不同。 + +我们以 `ChineseSystemPromptGenerator` 为例,说明如何使用系统提示生成器。 + +初始化 +^^^^^^^^^^^^^^^^^^^^^^^^ + +要初始化生成器,你需要首先在 `agentscope.init` 函数中注册你的模型配置。 +""" + +from agentscope.prompt import ChineseSystemPromptGenerator +import agentscope + +model_config = { + "model_type": "dashscope_chat", + "config_name": "qwen_config", + "model_name": "qwen-max", + # 通过环境变量导出你的 api 密钥 +} + +# %% +# 生成器将使用内置的默认元提示来指导大语言模型生成系统提示。 +# + +agentscope.init( + model_configs=model_config, +) + +prompt_generator = ChineseSystemPromptGenerator( + model_config_name="qwen_config", +) + + +# %% +# 我们欢迎用户自由尝试不同的优化方法。我们提供了相应的 `SystemPromptGeneratorBase` 模块,可以通过继承来实现自定义的系统提示生成器。 +# +# 生成系统提示 +# ^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# 调用生成器的 `generate` 函数来生成系统提示,如下所示。 +# +# 可以输入一个需求,或者要优化的系统提示。 + +generated_system_prompt = prompt_generator.generate( + user_input="为一位小红书营销专家生成系统提示,他负责推广书籍。", +) + +print(generated_system_prompt) + +# %% +# 上下文学习(ICL) +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# AgentScope 在系统提示生成中支持上下文学习。 +# +# 要使用示例,AgentScope 提供了以下参数: +# +# - `example_num`: 附加到元提示的示例数量,默认为 0 +# - `example_selection_strategy`: 选择示例的策略,可选 "random" 和 "similarity"。 +# - `example_list`: 一个示例列表,其中每个示例必须是一个带有键 "user_prompt" 和 "opt_prompt" 的字典。如果未指定,将使用内置的示例列表。 +# +# 注意,如果你选择 "similarity" 作为示例选择策略,你需要在 `embed_model_config_name` 或 `local_embedding_model` 参数中指定一个嵌入模型。 +# +# 它们的区别如下: +# +# - `embed_model_config_name`: 你必须先在 `agentscope.init` 中注册嵌入模型,并在此参数中指定模型配置名称。 +# - `local_embedding_model`: 或者,你可以使用 `sentence_transformers.SentenceTransformer` 库支持的本地小型嵌入模型。 +# +# 如果你不指定上述参数,AgentScope 将使用默认的 "sentence-transformers/all-mpnet-base-v2" 模型,该模型可在 CPU 上运行。 + +icl_generator = ChineseSystemPromptGenerator( + model_config_name="qwen_config", + example_num=3, + example_selection_strategy="random", +) + +icl_generated_system_prompt = icl_generator.generate( + user_input="为一位小红书营销专家生成系统提示,他负责推广书籍。", +) + +print(icl_generated_system_prompt) + +# %% +# .. note:: 1. 样例的 Embedding 将被缓存在 `~/.cache/agentscope/` 中,以避免重复计算。 +# 2. `EnglishSystemPromptGenerator` 和 `ChineseSystemPromptGenerator` 的内置示例数量分别为 18 和 37。请注意 Embedding API 服务的成本。 +# diff --git a/docs/tutorial/zh/source/tutorial/quickstart.py b/docs/tutorial/zh/source/tutorial/quickstart.py new file mode 100644 index 000000000..f1d263e50 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/quickstart.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +""" +.. _quickstart: + +快速入门 +============================ + +AgentScope 需要 Python 3.9 或更高版本。你可以从源码或 pypi 安装。 + +从 PyPI 安装 +---------------- +.. code-block:: bash + + pip install agentscope + +从源码安装 +---------------- +要从源码安装 AgentScope,你需要从 GitHub 克隆仓库,然后通过以下命令安装 + +.. code-block:: bash + + git clone https://github.com/modelscope/agentscope + cd agentscope + pip install -e . + +要确保 AgentScope 安装正常。可以执行以下代码: +""" + +import agentscope + +print(agentscope.__version__) + +# %% +# 额外依赖 +# ---------------------------- +# +# AgentScope 提供了针对不同需求的额外依赖。你可以根据需求安装它们。 +# +# - ollama: Ollama API +# - litellm: Litellm API +# - zhipuai: Zhipuai API +# - gemini: Gemini API +# - anthropic: Anthropic API +# - service: 用于不同工具函数的依赖 +# - distribute: 用于分布式模式的依赖 +# - full: 一次性安装所有依赖 +# +# 以分布式模式为例,安装命令因操作系统而异。 +# +# 对于 Windows 用户: +# +# .. code-block:: bash +# +# pip install agentscope[gemini] +# # 或 +# pip install agentscope[ollama,distribute] +# +# 对于 Mac 和 Linux 用户: +# +# .. code-block:: bash +# +# pip install agentscope\[gemini\] +# # 或 +# pip install agentscope\[ollama,distribute\] +# diff --git a/docs/tutorial/zh/source/tutorial/rag.md b/docs/tutorial/zh/source/tutorial/rag.md new file mode 100644 index 000000000..8199f5361 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/rag.md @@ -0,0 +1,280 @@ +# RAG + +我们在此介绍AgentScope与RAG相关的三个概念:知识(Knowledge),知识库(Knowledge Bank)和RAG 智能体。 + +### Knowledge +知识模块(目前仅有“LlamaIndexKnowledge”;即将提供对LangChain的支持)负责处理所有与RAG相关的操作。 + +#### 如何初始化一个Knowledge对象 + 用户可以使用JSON配置来创建一个Knowledge模块,以指定1)数据路径,2)数据加载器,3)数据预处理方法,以及4)嵌入模型(模型配置名称)。 +一个详细的示例可以参考以下内容: +
+ 详细的配置示例 + + ```json + [ + { + "knowledge_id": "{your_knowledge_id}", + "emb_model_config_name": "{your_embed_model_config_name}", + "data_processing": [ + { + "load_data": { + "loader": { + "create_object": true, + "module": "llama_index.core", + "class": "SimpleDirectoryReader", + "init_args": { + "input_dir": "{path_to_your_data_dir_1}", + "required_exts": [".md"] + } + } + } + }, + { + "load_data": { + "loader": { + "create_object": true, + "module": "llama_index.core", + "class": "SimpleDirectoryReader", + "init_args": { + "input_dir": "{path_to_your_python_code_data_dir}", + "recursive": true, + "required_exts": [".py"] + } + } + }, + "store_and_index": { + "transformations": [ + { + "create_object": true, + "module": "llama_index.core.node_parser", + "class": "CodeSplitter", + "init_args": { + "language": "python", + "chunk_lines": 100 + } + } + ] + } + } + ] + } + ] + ``` + +
+ +#### 更多关于 knowledge 配置 +以上提到的配置通常保存为一个JSON文件,它必须包含以下关键属性 +* `knowledge_id`: 每个knowledge模块的唯一标识符; +* `emb_model_config_name`: embedding模型的名称; +* `chunk_size`: 对文件分块的默认大小; +* `chunk_overlap`: 文件分块之间的默认重叠大小; +* `data_processing`: 一个list型的数据处理方法集合。 + +##### 以配置 LlamaIndexKnowledge 为例 + +当使用`llama_index_knowledge`是,对于上述的最后一项`data_processing` ,这个`list`型的参数中的每个条目(为`dict`型)都对应配置一个data loader对象,其功能包括用来加载所需的数据(即字段`load_data`中包含的信息),以及处理加载数据的转换对象(`store_and_index`)。换而言之,在一次载入数据时,可以同时从多个数据源中加载数据,并处理后合并在同一个索引下以供后面的数据提取使用(retrieve)。有关该组件的更多信息,请参阅 [LlamaIndex-Loading](https://docs.llamaindex.ai/en/stable/module_guides/loading/)。 + +在这里,无论是针对数据加载还是数据处理,我们都需要配置以下属性 +* `create_object`:指示是否创建新对象,在此情况下必须为true; +* `module`:对象对应的类所在的位置; +* `class`:这个类的名称。 + +更具体得说,当对`load_data`进行配置时候,您可以选择使用多种多样的的加载器,例如使用`SimpleDirectoryReader`(在`class`字段里配置)来读取各种类型的数据(例如txt、pdf、html、py、md等)。关于这个数据加载器,您还需要配置以下关键属性 +* `input_dir`:数据加载的路径; +* `required_exts`:将加载的数据的文件扩展名。 + +有关数据加载器的更多信息,请参阅[这里](https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/)。 + +对于`store_and_index`而言,这个配置是可选的,如果用户未指定特定的转换方式,系统将使用默认的transformation(也称为node parser)方法,名称为`SentenceSplitter`。对于某些特定需求下也可以使用不同的转换方式,例如对于代码解析可以使用`CodeSplitter`,针对这种特殊的node parser,用户可以设置以下属性: +* `language`:希望处理代码的语言名; +* `chunk_lines`:分割后每个代码块的行数。 + +有关节点解析器的更多信息,请参阅[这里](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/)。 + +如果用户想要避免详细的配置,我们也在`KnowledgeBank`中提供了一种快速的方式(请参阅以下内容)。 + +#### 如何使用一个 Knowledge 对象 +当我们成功创建了一个knowledge后,用户可以通过`.retrieve`从`Knowledge` 对象中提取信息。`.retrieve`函数一下三个参数: +* `query`: 输入参数,用户希望提取与之相关的内容; +* `similarity_top_k`: 提取的“数据块”数量; +* `to_list_strs`: 是否只返回字符串(str)的列表(list)。 + +*高阶:* 对于 `LlamaIndexKnowledge`, 它的`.retrieve`函数也支持熟悉LlamaIndex的用户直接传入一个建好的retriever。 + +#### 关于`LlamaIndexKnowledge`的细节 +在这里,我们将使用`LlamaIndexKnowledge`作为示例,以说明在`Knowledge`模块内的操作。 +当初始化`LlamaIndexKnowledge`对象时,`LlamaIndexKnowledge.__init__`将执行以下步骤: + * 它处理数据并生成检索索引 (`LlamaIndexKnowledge._data_to_index(...)`中完成) 其中包括 + * 加载数据 `LlamaIndexKnowledge._data_to_docs(...)`; + * 对数据进行预处理,使用预处理方法(比如分割)和向量模型生成向量 `LlamaIndexKnowledge._docs_to_nodes(...)`; + * 基于生成的向量做好被查询的准备, 即生成索引。 + * 如果索引已经存在,则会调用 `LlamaIndexKnowledge._load_index(...)` 来加载索引,并避免重复的嵌入调用。 +
+ +### Knowledge Bank +知识库将一组Knowledge模块(例如,来自不同数据集的知识)作为知识的集合进行维护。因此,不同的智能体可以在没有不必要的重新初始化的情况下重复使用知识模块。考虑到配置Knowledge模块可能对大多数用户来说过于复杂,知识库还提供了一个简单的函数调用来创建Knowledge模块。 + +* `KnowledgeBank.add_data_as_knowledge`: 创建Knowledge模块。一种简单的方式只需要提供knowledge_id、emb_model_name和data_dirs_and_types。 + 因为`KnowledgeBank`默认生成的是 `LlamaIndexKnowledge`, 所以所有文本类文件都可以支持,包括`.txt`, `.html`, `.md` ,`.csv`,`.pdf`和 所有代码文件(如`.py`). 其他支持的文件类型可以参考 [LlamaIndex document](https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/). + ```python + knowledge_bank.add_data_as_knowledge( + knowledge_id="agentscope_tutorial_rag", + emb_model_name="qwen_emb_config", + data_dirs_and_types={ + "../../docs/sphinx_doc/en/source/tutorial": [".md"], + }, + ) + ``` + 对于更高级的初始化,用户仍然可以将一个知识模块配置作为参数knowledge_config传递: + ```python + # load knowledge_config as dict + knowledge_bank.add_data_as_knowledge( + knowledge_id=knowledge_config["knowledge_id"], + emb_model_name=knowledge_config["emb_model_config_name"], + knowledge_config=knowledge_config, + ) + ``` +* `KnowledgeBank.get_knowledge`: 它接受两个参数,knowledge_id和duplicate。 + 如果duplicate为true,则返回提供的knowledge_id对应的知识对象;否则返回深拷贝的对象。 +* `KnowledgeBank.equip`: 它接受三个参数,`agent`,`knowledge_id_list` 和`duplicate`。 +该函数会根据`knowledge_id_list`为`agent`提供相应的知识(放入`agent.knowledge_list`)。`duplicate` 同样决定是否是深拷贝。 + + + +### RAG 智能体 +RAG 智能体是可以基于检索到的知识生成答案的智能体。 + * 让智能体使用RAG: RAG agent配有一个`knowledge_list`的列表 + * 可以在初始化时就给RAG agent传入`knowledge_list` + ```python + knowledge = knowledge_bank.get_knowledge(knowledge_id) + agent = LlamaIndexAgent( + name="rag_worker", + sys_prompt="{your_prompt}", + model_config_name="{your_model}", + knowledge_list=[knowledge], # provide knowledge object directly + similarity_top_k=3, + log_retrieval=False, + recent_n_mem_for_retrieve=1, + ) + ``` + * 如果通过配置文件来批量启动agent,也可以给agent提供`knowledge_id_list`。这样也可以通过将agent和它的`knowledge_id_list`一起传入`KnowledgeBank.equip`来为agent赋予`knowledge_list`。 + ```python + # >>> agent.knowledge_list + # >>> [] + knowledge_bank.equip(agent, agent.knowledge_id_list) + # >>> agent.knowledge_list + # [] + ``` + * Agent 智能体可以在`reply`函数中使用从`Knowledge`中检索到的信息,将其提示组合到LLM的提示词中。 + +**自己搭建 RAG 智能体.** 只要您的智能体配置具有`knowledge_id_list`,您就可以将一个agent和这个列表传递给`KnowledgeBank.equip`;这样该agent就是被装配`knowledge_id`。 +您可以在`reply`函数中自己决定如何从`Knowledge`对象中提取和使用信息,甚至通过`Knowledge`修改知识库。 + + +## (拓展) 架设自己的embedding model服务 + +我们在此也对架设本地embedding model感兴趣的用户提供以下的样例。 +以下样例基于在embedding model范围中很受欢迎的`sentence_transformers` 包(基于`transformer` 而且兼容HuggingFace和ModelScope的模型)。 +这个样例中,我们会使用当下最好的文本向量模型之一`gte-Qwen2-7B-instruct`。 + + +* 第一步: 遵循在 [HuggingFace](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct) 或者 [ModelScope](https://www.modelscope.cn/models/iic/gte_Qwen2-7B-instruct )的指示下载模型。 + (如果无法直接从HuggingFace下载模型,也可以考虑使用HuggingFace镜像:bash命令行`export HF_ENDPOINT=https://hf-mirror.com`,或者在Python代码中加入`os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"`) +* 第二步: 设置服务器。以下是一段参考代码。 + +```python +import datetime +import argparse + +from flask import Flask +from flask import request +from sentence_transformers import SentenceTransformer + +def create_timestamp(format_: str = "%Y-%m-%d %H:%M:%S") -> str: + """Get current timestamp.""" + return datetime.datetime.now().strftime(format_) + +app = Flask(__name__) + +@app.route("/embedding/", methods=["POST"]) +def get_embedding() -> dict: + """Receive post request and return response""" + json = request.get_json() + + inputs = json.pop("inputs") + + global model + + if isinstance(inputs, str): + inputs = [inputs] + + embeddings = model.encode(inputs) + + return { + "data": { + "completion_tokens": 0, + "messages": {}, + "prompt_tokens": 0, + "response": { + "data": [ + { + "embedding": emb.astype(float).tolist(), + } + for emb in embeddings + ], + "created": "", + "id": create_timestamp(), + "model": "flask_model", + "object": "text_completion", + "usage": { + "completion_tokens": 0, + "prompt_tokens": 0, + "total_tokens": 0, + }, + }, + "total_tokens": 0, + "username": "", + }, + } + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model_name_or_path", type=str, required=True) + parser.add_argument("--device", type=str, default="auto") + parser.add_argument("--port", type=int, default=8000) + args = parser.parse_args() + + global model + + print("setting up for embedding model....") + model = SentenceTransformer( + args.model_name_or_path + ) + + app.run(port=args.port) +``` + +* 第三部:启动服务器。 +```bash +python setup_ms_service.py --model_name_or_path {$PATH_TO_gte_Qwen2_7B_instruct} +``` + + +测试服务是否成功启动。 +```python +from agentscope.models.post_model import PostAPIEmbeddingWrapper + + +model = PostAPIEmbeddingWrapper( + config_name="test_config", + api_url="http://127.0.0.1:8000/embedding/", + json_args={ + "max_length": 4096, + "temperature": 0.5 + } +) + +print(model("testing")) +``` \ No newline at end of file diff --git a/docs/tutorial/zh/source/tutorial/streaming.py b/docs/tutorial/zh/source/tutorial/streaming.py new file mode 100644 index 000000000..97890ac00 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/streaming.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +""" +.. _streaming: + +流式输出 +========================= + +AgentScope 支持在终端和 AgentScope Studio 中以打字机效果显示流式输出。 + +.. list-table:: + :header-rows: 1 + + * - API + - 类 + - 是否支持流式输出 + * - OpenAI Chat API + - `OpenAIChatWrapper` + - ✓ + * - DashScope Chat API + - `DashScopeChatWrapper` + - ✓ + * - Gemini Chat API + - `GeminiChatWrapper` + - ✓ + * - ZhipuAI Chat API + - `ZhipuAIChatWrapper` + - ✓ + * - Ollama Chat API + - `OllamaChatWrapper` + - ✓ + * - LiteLLM Chat API + - `LiteLLMChatWrapper` + - ✓ + * - Anthropic Chat API + - `AnthropicChatWrapper` + - ✓ + +本节将展示如何在 AgentScope 中启用流式输出,以及如何在智能体中处理流式返回。 +""" + +# %% +# 启用流式输出 +# ---------------------------- +# +# 通过设置模型类的 `stream` 参数,启用流式输出。 +# 你可以在初始化或配置中直接指定`stream`参数。 +# +# - 在初始化中指定 +# + +from agentscope.models import DashScopeChatWrapper +import os + +model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", + api_key=os.environ["DASHSCOPE_API_KEY"], + stream=True, # 启用流式输出 +) + +# %% +# - 在模型配置中指定 + +model_config = { + "model_type": "dashscope_chat", + "config_name": "qwen_config", + "model_name": "qwen-max", + "stream": True, +} + +# %% +# 使用上述模型配置,我们可以在 AgentScope 中使用内置智能体获取流式输出。 +# +# 接下来,我们展示如何在智能体中处理流式输出。 + +# %% +# 处理流式响应 +# ------------------------------------------- +# +# 一旦我们启用了流式输出,模型返回对象中的 `stream` 字段将包含一个生成器。 +# + +prompt = [{"role": "user", "content": "Hi!"}] + +response = model(prompt) +print("response.stream的类型:", type(response.stream)) + +# %% +# 我们可以遍历生成器以获取流式文本。 +# 该生成器同时也会生成一个布尔值,标识当前是否为最后一个文本块。 + +for index, chunk in enumerate(response.stream): + print(f"{index}.", chunk) + print(f"当前text字段:", response.text, "\n") + +# %% +# .. note:: 注意 `response.stream` 挂载的生成器是增量的,并且只能使用一次。 +# 在遍历过程中,`response` 的 `text` 字段会自动拼接字符串。 +# 为了与非流式模式兼容,你也可以直接使用`response.text`一次获取所有文本。 + +prompt = [{"role": "user", "content": "Hi!"}] +response = model(prompt) +# 一次性获取所有文本 +print(response.text) + +# %% +# 打字机效果 +# ------------------------------------------- +# 为了实现打字机的显示效果,AgentScope 在 `AgentBase` 类中提供了一个 `speak` 函数。 +# 如果给定了一个生成器,`speak` 函数会遍历生成器并在终端或 AgentScope Studio 中以打字机效果打印文本。 +# +# .. code-block:: python +# +# def reply(*args, **kwargs): +# # ... +# self.speak(response.stream) +# # ... +# +# 为了使一套代码同时兼容流式和非流式模式,AgentScope 的所有内置智能体中使用以下代码片段。 +# +# .. code-block:: python +# +# def reply(*args, **kwargs): +# # ... +# self.speak(response.stream or response.text) +# # ... +# diff --git a/docs/tutorial/zh/source/tutorial/structured_output.py b/docs/tutorial/zh/source/tutorial/structured_output.py new file mode 100644 index 000000000..5cadd106a --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/structured_output.py @@ -0,0 +1,235 @@ +# -*- coding: utf-8 -*- +""" +.. _structured-output: + +结构化输出 +========================== + +在本教程中,我们将构建一个简单的智能体,使用 `agentscope.parsers` 模块以 JSON 字典格式输出结构化数据。 +""" +from agentscope.models import ModelResponse + +# %% +# 定义解析器 +# ------------------- + +from agentscope.parsers import MarkdownJsonDictParser + + +parser = MarkdownJsonDictParser( + content_hint='{"thought": "你的想法", "speak": "你对用户说的话"}', + required_keys=["thought", "speak"], +) + + +# %% +# 解析器将根据你的输入生成一个格式说明。你可以在提示中使用 `format_instruction` 属性来指导 LLM 生成所需的输出。 + +print(parser.format_instruction) + +# %% +# 解析输出 +# ------------------- +# 当从 LLM 接收到输出时,使用 `parse` 方法来提取结构化数据。 +# 它接受一个 `agentscope.models.ModelResponse` 对象作为输入,解析 `text` 字段的值,并在 `parsed` 字段中返回解析后的字典。 + +dummy_response = ModelResponse( + text="""```json +{ + "thought": "我应该向用户打招呼", + "speak": "嗨!我能为您做些什么?" +} +```""", +) + +print(f"解析前parsed字段: {dummy_response.parsed}") + +parsed_response = parser.parse(dummy_response) + +print(f"解析后parsed字段: {parsed_response.parsed}") +print(type(parsed_response.parsed)) + +# %% +# 错误处理 +# ------------------- +# 如果LLM的输出与预期格式不匹配,解析器将抛出一个包含详细信息的错误。 +# 因此开发人员可以将错误消息呈现给 LLM,以指导它纠正输出。 +# + +error_response = ModelResponse( + text="""```json +{ + "thought": "我应该向用户打招呼" +} +```""", +) + +try: + parsed_response = parser.parse(error_response) +except Exception as e: + print(e) + +# %% +# 进阶用法 +# ------------------- +# 复杂结构化输出 +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# 要求 LLM 直接生成 JSON 字典可能具有挑战性,特别是当 JSON 内容很复杂时(例如代码片段、嵌套结构)。 +# 在这种情况下,你可以使用更高级的解析器来指导 LLM 生成所需的输出。 +# 这里是一个更复杂的解析器示例,可以处理代码片段。 +# + +from agentscope.parsers import RegexTaggedContentParser + +parser = RegexTaggedContentParser( + format_instruction="""按以下格式作答: +你的想法 +这里放一个随机数字 +你的python代码 +""", + try_parse_json=True, # 会尝试将每个键值解析为JSON对象,如果失败则保留为字符串 + required_keys=[ # 解析字典中的必需键 + "thought", + "number", + "code", + ], +) + +print(parser.format_instruction) + +# %% +# `RegexTaggedContentParser` 支持使用正则表达式匹配文本中的标记内容并返回解析后的字典。 +# +# .. note:: `RegexTaggedContentParser`的解析输出是一个字典,这意味着必需键应该是唯一的。 +# 你也可以在初始化解析器时通过设置 `tagged_content_pattern` 参数来更改正则表达式模式。 + +import json + +dummy_response = ModelResponse( + text="""打印当前日期 +42 +import datetime +print(datetime.datetime.now()) + +""", +) + +parsed_response = parser.parse(dummy_response) + +print("解析响应的类型: ", type(parsed_response.parsed)) +print("number的类型: ", type(parsed_response.parsed["number"])) +print(json.dumps(parsed_response.parsed, indent=4, ensure_ascii=False)) + +# %% +# 自动后处理 +# ^^^^^^^^^^^^^^^^^^^^ +# +# 在解析后的字典中,不同的键可能需要不同的后处理步骤。 +# 例如,在狼人杀游戏中,LLM 扮演预言家的角色,输出应该包含以下键值: +# +# - `thought`: 预言家的想法 +# - `speak`: 预言家的发言 +# - `use_ability`: 一个布尔值,表示预言家是否应该使用其能力 +# +# 在这种情况下,`thought` 和 `speak` 内容应该存储在智能体的记忆中,以确保智能体行为/策略的一致性。 +# `speak` 内容应该暴露给其它智能体或玩家。 +# `use_ability` 键应该能在主流程中访问到,从而确定游戏下一步的操作(例如是否使用能力)。 +# +# AgentScope 通过以下参数来自动对解析后的字典进行后处理。 +# +# - `keys_to_memory`: 应存储在智能体记忆中的键 +# - `keys_to_content`: 应存储在返回消息的 content 字段中的键,会暴露给其它智能体 +# - `keys_to_metadata`: 应存储在返回消息的元数据(metadata)字段中的键 +# +# .. note:: 如果提供了一个字符串,解析器将从解析后的字典中提取给定键的值。如果提供了一个字符串列表,将创建一个包含给定键的子字典。 +# +# 下面是使用 `MarkdownJsonDictParser` 自动后处理解析后字典的示例。 +# + +parser = MarkdownJsonDictParser( + content_hint='{"thought": "你的想法", "speak": "你对用户说的话", "use_ability": "是否使用能力"}', + keys_to_memory=["thought", "speak"], + keys_to_content="speak", + keys_to_metadata="use_ability", +) + +dummy_response = ModelResponse( + text="""```json +{ + "thought": "我应该...", + "speak": "我不会使用我的能力", + "use_ability": false +}``` +""", +) + +parsed_response = parser.parse(dummy_response) + +print("解析后的响应: ", parsed_response.parsed) +print("存储到记忆", parser.to_memory(parsed_response.parsed)) +print("存储到消息 content 字段: ", parser.to_content(parsed_response.parsed)) +print("存储到消息 metadata 字段: ", parser.to_metadata(parsed_response.parsed)) + +# %% +# 这里我们展示如何创建一个智能体,它在 `reply` 方法中通过以下步骤实现自动化的后处理。 +# +# 1. 在提示中放入格式说明,以指导 LLM 生成所需的输出 +# 2. 解析 LLM 的返回值 +# 3. 使用 `to_memory`、`to_content` 和 `to_metadata` 方法后处理解析后的字典 +# +# .. tip:: 通过更改不同的解析器,智能体可以适应不同的场景,并以各种格式生成结构化输出。 +# + +from agentscope.models import DashScopeChatWrapper +from agentscope.agents import AgentBase +from agentscope.message import Msg + + +class Agent(AgentBase): + def __init__(self): + self.name = "Alice" + super().__init__(name=self.name) + + self.sys_prompt = f"你是一个名为{self.name}的有用助手。" + + self.model = DashScopeChatWrapper( + config_name="_", + model_name="qwen-max", + ) + + self.parser = MarkdownJsonDictParser( + content_hint='{"thought": "你的想法", "speak": "你对用户说的话", "use_ability": "是否使用能力"}', + keys_to_memory=["thought", "speak"], + keys_to_content="speak", + keys_to_metadata="use_ability", + ) + + self.memory.add(Msg("system", self.sys_prompt, "system")) + + def reply(self, msg): + self.memory.add(msg) + + prompt = self.model.format( + self.memory.get_memory(), + # 指示模型按要求的格式作答 + Msg("system", self.parser.format_instruction, "system"), + ) + + response = self.model(prompt) + + parsed_response = self.parser.parse(response) + + self.memory.add( + Msg( + name=self.name, + content=self.parser.to_memory(parsed_response.parsed), + role="assistant", + ), + ) + + return Msg( + name=self.name, + content=self.parser.to_content(parsed_response.parsed), + role="assistant", + metadata=self.parser.to_metadata(parsed_response.parsed), + ) diff --git a/docs/tutorial/zh/source/tutorial/tool.py b/docs/tutorial/zh/source/tutorial/tool.py new file mode 100644 index 000000000..50bfb1e0c --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/tool.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +""" +.. _tools: + +工具 +==================== + +在本教程中,我们将展示如何使用 AgentScope 中内置的工具函数,以及如何创建自定义工具函数。 +""" +import json + +import agentscope +from agentscope.message import Msg + +# %% +# 内置工具函数 +# -------------------------- +# AgentScope 提供了一个 `ServiceToolkit` 模块,支持以下功能: +# +# - 工具介绍生成, +# - 提供一套默认的调用格式, +# - 模型返回值解析、工具调用和面向智能体的错误处理。 +# +# 在使用 `ServiceToolkit` 之前,我们可以先看一下 `agentscope.service` 模块中可用的工具。 +# + +from agentscope.service import get_help, ServiceResponse, ServiceExecStatus + +get_help() + +# %% +# 以上所有函数都是用 Python 函数实现的。 +# 可以通过调用 `add` 方法注册到 `ServiceToolkit` 中。 +# + +from agentscope.service import ServiceToolkit +from agentscope.service import bing_search, execute_shell_command + +toolkit = ServiceToolkit() +toolkit.add(execute_shell_command) + +# 注意,一些工具函数的参数(例如 api_key)应该由开发人员处理。 +# 你可以直接在 add 方法中以关键字参数的形式传递这些参数,保留其他参数留给智能体填写。 + +toolkit.add(bing_search, api_key="xxx") + +print("工具说明:") +print(toolkit.tools_instruction) + +# %% +# 内置的默认调用格式: +# + +print(toolkit.tools_calling_format) + +# %% +# 自动生成的工具函数 JSON Schema 格式说明: +# +print(json.dumps(toolkit.json_schemas, indent=2)) + + +# %% +# AgentScope 提供了 `ReActAgent` 智能体类来使用工具,只需要将 `ServiceToolkit` 对象传递给这个智能体。 +# 有关该智能体的实现细节,请参阅 :ref:`builtin_agent`。 +# + +from agentscope.agents import ReActAgent + +agentscope.init( + model_configs={ + "config_name": "my-qwen-max", + "model_type": "dashscope_chat", + "model_name": "qwen-max", + }, +) + +agent = ReActAgent( + name="Friday", + model_config_name="my-qwen-max", + service_toolkit=toolkit, + sys_prompt="你是一个名为 Friday 的助手。", +) + +msg_task = Msg("user", "帮我计算一下 1615114134*4343434343", "user") + +res = agent(msg_task) + + +# %% +# 创建工具函数 +# -------------------------- +# 自定义工具函数必须遵循以下规则: +# +# - 参数使用 typing 指定类型 +# - 使用 Google 风格书写完整的 docstring +# - 函数返回值必须用 `ServiceResponse` 包装 +# + + +def new_function(arg1: str, arg2: int) -> ServiceResponse: + """简单介绍该函数。 + + Args: + arg1 (`str`): + 对 arg1 的简单描述 + arg2 (`int`): + 对 arg2 的简单描述 + """ + return ServiceResponse( + status=ServiceExecStatus.SUCCESS, + content="完成!", + ) diff --git a/docs/tutorial/zh/source/tutorial/visual.py b/docs/tutorial/zh/source/tutorial/visual.py new file mode 100644 index 000000000..e24332b04 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/visual.py @@ -0,0 +1,202 @@ +# -*- coding: utf-8 -*- +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: sphinx +# format_version: '1.1' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +""" +.. _visual-interface: + +可视化 +========================= + +AgentScope 支持包括 Gradio 和 AgentScope Studio 在内的可视化,以提高用户体验。 + +Gradio +~~~~~~~~~~~~~~~~~~~~~~ + +首先,请确保已安装完整版本的 AgentScope, 其中包含 Gradio 包。 + +.. code-block:: bash + + # From pypi + pip install agentscope[full] + + # From source code + cd agentscope + pip install .[full] + + +之后,请确保您的应用程序被封装在一个 `main` 函数中。 + +.. code-block:: python + + from agentscope.agents import DialogAgent, UserAgent + import agentscope + + + def main(): + # Your code here + agentscope.init(model_configs={ + "config_name": "my-qwen-max", + "model_type": "dashscope_chat", + "model_name": "qwen-max" + }) + + agent = DialogAgent( + name="Alice, + model_config_name="my-qwen-max", + sys_prompt="You're a helpful assistant named Alice." + ) + user = UserAgent(agent) + + msg = None + while True: + msg = agent(msg) + msg = user(msg) + if msg.content == "exit": + break + + +然后在终端执行以下命令启动 Gradio UI: + +.. code-block :: bash + + as_gradio {path_to_your_python_code} + +最后,您可以访问 Gradio UI,如下所示: + +.. image:: https://img.alicdn.com/imgextra/i1/O1CN0181KSfH1oNbfzjUAVT_!!6000000005213-0-tps-3022-1530.jpg + :align: center + :class: bordered-image + +------------------------------ + +AgentScope Studio +~~~~~~~~~~~~~~~~~~ + +AgentScope Studio 是一个开源的 Web UI 工具包,用于构建和监控多智能体应用程序。它提供以下功能: + +* **仪表板**: 一个用于监控正在运行的应用程序,查看、管理运行历史的界面。 + +* **工作站**: 一个拖拽式构建应用的低代码开发界面。 + +* **服务器管理器**: 一个用于管理大规模分布式应用的界面。 + +* **画廊**: 工作站中应用程序示例。(即将推出!) + +.. _studio: + +启动 AgentScope Studio +---------------------------- + +要启动 Studio,首先确保您已安装最新版本的 AgentScope。然后运行以下 Python 代码: + +.. code-block:: python + + import agentscope + agentscope.studio.init() + +或者可以在终端中运行以下命令: + +.. code-block :: python + + as_studio + +之后,可以访问 http://127.0.0.1:5000 上的 AgentScope Studio,将显示以下页面: + +.. image:: https://img.alicdn.com/imgextra/i3/O1CN01Xic0GQ1ZkJ4M0iD8F_!!6000000003232-0-tps-3452-1610.jpg + :align: center + :class: bordered-image + +当然,也可以更改主机和端口,并通过提供以下参数链接到你的应用程序运行历史记录: + +.. code-block:: python + + import agentscope + + agentscope.studio.init( + host="127.0.0.1", # AgentScope Studio的IP地址 + port=5000, # AgentScope Studio的端口号 + run_dirs = [ # 应用运行历史的文件目录 + "xxx/xxx/runs", + "xxx/xxx/runs" + ] + ) + + +仪表板 +----------------- + +仪表板是一个 Web 界面,用于监控您正在运行的应用程序并查看运行历史记录。 + + +注意 +^^^^^^^^^^^^^^^^^^^^^ + +目前,仪表板存在以下限制,我们正在努力改进。欢迎任何反馈、贡献或建议! + +* 运行的应用程序和 AgentScope Studio 必须运行在同一台机器上,以保证"URL/路径一致性"。如果您想在其他机器上访问 AgentScope,您可以尝试通过在远程机器上运行以下命令来转发端口: + +.. code-block :: bash + + # 假设 AgentScope 运行在{as_host}:{as_port},远程机器的端口是{remote_machine_port} + ssh -L {remote_machine_port}:{as_host}:{as_port} [{user_name}@]{as_host} + +* 对于分布式应用程序,支持单机多进程模式,但尚不支持多机多进程模式。 + +注册应用程序 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +在启动 AgentScope Studio 后,可以通过在 `agentscope.init()` 中指定 `studio_url` 来注册正在运行的应用程序: + +.. code-block:: python + + import agentscope + + agentscope.init( + # ... + project="xxx", + name="xxx", + studio_url="http://127.0.0.1:5000" # AgentScope Studio的URL + ) + +注册后,可以在仪表板中查看正在运行的应用程序。为了区分不同的应用程序,可以指定应用程序的项目和名称。 + +.. image:: https://img.alicdn.com/imgextra/i2/O1CN01zcUmuJ1I3OUXy1Q35_!!6000000000837-0-tps-3426-1718.jpg + :align: center + :class: bordered-image + +单击状态为 `waiting` 的程序,即可进入执行界面。例如,下图显示了一个对话界面。 + +.. image:: https://img.alicdn.com/imgextra/i3/O1CN01sA3VUc1h7OLKVLfr3_!!6000000004230-0-tps-3448-1736.jpg + :align: center + :class: bordered-image + + +.. note:: 一旦注册了正在运行的应用程序,`agentscope.agents.UserAgent` 类中的输入操作将转移到 AgentScope Studio 的仪表板。 + +导入运行历史记录 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +在 AgentScope 中,运行历史记录默认保存在 `./runs` 目录中。如果您想在仪表板中查看这些运行历史记录,可以在 `agentscope.studio.init()` 中指定 `run_dirs` : + + +.. code-block:: python + + import agentscope + + agentscope.studio.init( + run_dirs = ["xxx/runs"] + ) + +""" diff --git a/docs/tutorial/zh/source/tutorial/web_browser.py b/docs/tutorial/zh/source/tutorial/web_browser.py new file mode 100644 index 000000000..572798940 --- /dev/null +++ b/docs/tutorial/zh/source/tutorial/web_browser.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +""" +.. _web-browser-control: + +浏览器控制 +==================== + +本节重定向到 +`conversation_with_web_browser_agent/README.md +`_。 +""" From 3b42d1a406af91ad891b3fb5d5b4226bba7fbc8e Mon Sep 17 00:00:00 2001 From: DavdGao Date: Wed, 8 Jan 2025 20:50:19 +0800 Subject: [PATCH 23/23] Add Chinese path in .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f59dd25fe..383fcd698 100644 --- a/.gitignore +++ b/.gitignore @@ -135,6 +135,7 @@ dmypy.json # docs docs/tutorial/en/build/ +docs/tutorial/zh/build/ # Used to save loggings and files *runs/