From 4bb4ce85feb6324d336807f0e0db159f99a8c46e Mon Sep 17 00:00:00 2001 From: wwxxzz Date: Thu, 6 Jun 2024 16:21:42 +0800 Subject: [PATCH] support llm infer param: temperature (#52) * support llm infer param: temperature * modify style of retrieval chunks * support safe_html_content --- poetry.lock | 216 +++----------------------- pyproject.toml | 1 + src/pai_rag/app/api/models.py | 8 +- src/pai_rag/app/web/rag_client.py | 22 +-- src/pai_rag/app/web/tabs/chat_tab.py | 44 +++--- src/pai_rag/app/web/view_model.py | 3 + src/pai_rag/modules/llm/llm_module.py | 19 ++- 7 files changed, 76 insertions(+), 237 deletions(-) diff --git a/poetry.lock b/poetry.lock index 4e338697..99f3cbcf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,5 @@ # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "accelerate" @@ -3111,6 +3112,21 @@ files = [ httpx = ">=0.20.0" pydantic = ">=1.10" +[[package]] +name = "markdown" +version = "3.6" +description = "Python implementation of John Gruber's Markdown." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Markdown-3.6-py3-none-any.whl", hash = "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f"}, + {file = "Markdown-3.6.tar.gz", hash = "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224"}, +] + +[package.extras] +docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] +testing = ["coverage", "pyyaml"] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -3870,17 +3886,6 @@ files = [ {file = "nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", hash = "sha256:a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d"}, ] -[[package]] -name = "nvidia-nccl-cu12" -version = "2.20.5" -description = "NVIDIA Collective Communication Library (NCCL) Runtime" -optional = false -python-versions = ">=3" -files = [ - {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"}, - {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"}, -] - [[package]] name = "nvidia-nvjitlink-cu12" version = "12.5.40" @@ -4002,9 +4007,9 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, ] [[package]] @@ -4025,9 +4030,9 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, ] [[package]] @@ -4394,8 +4399,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -6692,34 +6697,8 @@ name = "torch" version = "2.2.2" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false -python-versions = ">=3.8.0" -files = [ - {file = "torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:bc889d311a855dd2dfd164daf8cc903a6b7273a747189cebafdd89106e4ad585"}, - {file = "torch-2.2.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:15dffa4cc3261fa73d02f0ed25f5fa49ecc9e12bf1ae0a4c1e7a88bbfaad9030"}, - {file = "torch-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:11e8fe261233aeabd67696d6b993eeb0896faa175c6b41b9a6c9f0334bdad1c5"}, - {file = "torch-2.2.2-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:b2e2200b245bd9f263a0d41b6a2dab69c4aca635a01b30cca78064b0ef5b109e"}, - {file = "torch-2.2.2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:877b3e6593b5e00b35bbe111b7057464e76a7dd186a287280d941b564b0563c2"}, - {file = "torch-2.2.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:ad4c03b786e074f46606f4151c0a1e3740268bcf29fbd2fdf6666d66341c1dcb"}, - {file = "torch-2.2.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:32827fa1fbe5da8851686256b4cd94cc7b11be962862c2293811c94eea9457bf"}, - {file = "torch-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:f9ef0a648310435511e76905f9b89612e45ef2c8b023bee294f5e6f7e73a3e7c"}, - {file = "torch-2.2.2-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:95b9b44f3bcebd8b6cd8d37ec802048c872d9c567ba52c894bba90863a439059"}, - {file = "torch-2.2.2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:49aa4126ede714c5aeef7ae92969b4b0bbe67f19665106463c39f22e0a1860d1"}, - {file = "torch-2.2.2-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:cf12cdb66c9c940227ad647bc9cf5dba7e8640772ae10dfe7569a0c1e2a28aca"}, - {file = "torch-2.2.2-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:89ddac2a8c1fb6569b90890955de0c34e1724f87431cacff4c1979b5f769203c"}, - {file = "torch-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:451331406b760f4b1ab298ddd536486ab3cfb1312614cfe0532133535be60bea"}, - {file = "torch-2.2.2-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:eb4d6e9d3663e26cd27dc3ad266b34445a16b54908e74725adb241aa56987533"}, - {file = "torch-2.2.2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:bf9558da7d2bf7463390b3b2a61a6a3dbb0b45b161ee1dd5ec640bf579d479fc"}, - {file = "torch-2.2.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cd2bf7697c9e95fb5d97cc1d525486d8cf11a084c6af1345c2c2c22a6b0029d0"}, - {file = "torch-2.2.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b421448d194496e1114d87a8b8d6506bce949544e513742b097e2ab8f7efef32"}, - {file = "torch-2.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:3dbcd563a9b792161640c0cffe17e3270d85e8f4243b1f1ed19cca43d28d235b"}, - {file = "torch-2.2.2-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:31f4310210e7dda49f1fb52b0ec9e59382cfcb938693f6d5378f25b43d7c1d29"}, - {file = "torch-2.2.2-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:c795feb7e8ce2e0ef63f75f8e1ab52e7fd5e1a4d7d0c31367ade1e3de35c9e95"}, - {file = "torch-2.2.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:a6e5770d68158d07456bfcb5318b173886f579fdfbf747543901ce718ea94782"}, - {file = "torch-2.2.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:67dcd726edff108e2cd6c51ff0e416fd260c869904de95750e80051358680d24"}, - {file = "torch-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:539d5ef6c4ce15bd3bd47a7b4a6e7c10d49d4d21c0baaa87c7d2ef8698632dfb"}, - {file = "torch-2.2.2-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:dff696de90d6f6d1e8200e9892861fd4677306d0ef604cb18f2134186f719f82"}, - {file = "torch-2.2.2-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:3a4dd910663fd7a124c056c878a52c2b0be4a5a424188058fe97109d4436ee42"}, -] +python-versions = "*" +files = [] [package.dependencies] filelock = "*" @@ -6741,64 +6720,6 @@ sympy = "*" triton = {version = "2.2.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""} typing-extensions = ">=4.8.0" -[package.extras] -opt-einsum = ["opt-einsum (>=3.3)"] -optree = ["optree (>=0.9.1)"] - -[[package]] -name = "torch" -version = "2.3.0" -description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = false -python-versions = ">=3.8.0" -files = [ - {file = "torch-2.3.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:d8ea5a465dbfd8501f33c937d1f693176c9aef9d1c1b0ca1d44ed7b0a18c52ac"}, - {file = "torch-2.3.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:09c81c5859a5b819956c6925a405ef1cdda393c9d8a01ce3851453f699d3358c"}, - {file = "torch-2.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:1bf023aa20902586f614f7682fedfa463e773e26c58820b74158a72470259459"}, - {file = "torch-2.3.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:758ef938de87a2653bba74b91f703458c15569f1562bf4b6c63c62d9c5a0c1f5"}, - {file = "torch-2.3.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:493d54ee2f9df100b5ce1d18c96dbb8d14908721f76351e908c9d2622773a788"}, - {file = "torch-2.3.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:bce43af735c3da16cc14c7de2be7ad038e2fbf75654c2e274e575c6c05772ace"}, - {file = "torch-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:729804e97b7cf19ae9ab4181f91f5e612af07956f35c8b2c8e9d9f3596a8e877"}, - {file = "torch-2.3.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:d24e328226d8e2af7cf80fcb1d2f1d108e0de32777fab4aaa2b37b9765d8be73"}, - {file = "torch-2.3.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:b0de2bdc0486ea7b14fc47ff805172df44e421a7318b7c4d92ef589a75d27410"}, - {file = "torch-2.3.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a306c87a3eead1ed47457822c01dfbd459fe2920f2d38cbdf90de18f23f72542"}, - {file = "torch-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9b98bf1a3c8af2d4c41f0bf1433920900896c446d1ddc128290ff146d1eb4bd"}, - {file = "torch-2.3.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:dca986214267b34065a79000cee54232e62b41dff1ec2cab9abc3fc8b3dee0ad"}, - {file = "torch-2.3.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:20572f426965dd8a04e92a473d7e445fa579e09943cc0354f3e6fef6130ce061"}, - {file = "torch-2.3.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:e65ba85ae292909cde0dde6369826d51165a3fc8823dc1854cd9432d7f79b932"}, - {file = "torch-2.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:5515503a193781fd1b3f5c474e89c9dfa2faaa782b2795cc4a7ab7e67de923f6"}, - {file = "torch-2.3.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:6ae9f64b09516baa4ef890af0672dc981c20b1f0d829ce115d4420a247e88fba"}, - {file = "torch-2.3.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:cd0dc498b961ab19cb3f8dbf0c6c50e244f2f37dbfa05754ab44ea057c944ef9"}, - {file = "torch-2.3.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e05f836559251e4096f3786ee99f4a8cbe67bc7fbedba8ad5e799681e47c5e80"}, - {file = "torch-2.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:4fb27b35dbb32303c2927da86e27b54a92209ddfb7234afb1949ea2b3effffea"}, - {file = "torch-2.3.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:760f8bedff506ce9e6e103498f9b1e9e15809e008368594c3a66bf74a8a51380"}, -] - -[package.dependencies] -filelock = "*" -fsspec = "*" -jinja2 = "*" -mkl = {version = ">=2021.1.1,<=2021.4.0", markers = "platform_system == \"Windows\""} -networkx = "*" -nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -sympy = "*" -triton = {version = "2.3.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""} -typing-extensions = ">=4.8.0" - -[package.extras] -opt-einsum = ["opt-einsum (>=3.3)"] -optree = ["optree (>=0.9.1)"] - [[package]] name = "torch" version = "2.3.0+cpu" @@ -6841,80 +6762,14 @@ name = "torchvision" version = "0.17.2" description = "image and video datasets and models for torch deep learning" optional = false -python-versions = ">=3.8" -files = [ - {file = "torchvision-0.17.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:1f2910fe3c21ad6875b2720d46fad835b2e4b336e9553d31ca364d24c90b1d4f"}, - {file = "torchvision-0.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ecc1c503fa8a54fbab777e06a7c228032b8ab78efebf35b28bc8f22f544f51f1"}, - {file = "torchvision-0.17.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:f400145fc108833e7c2fc28486a04989ca742146d7a2a2cc48878ebbb40cdbbd"}, - {file = "torchvision-0.17.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e9e4bed404af33dfc92eecc2b513d21ddc4c242a7fd8708b3b09d3a26aa6f444"}, - {file = "torchvision-0.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:ba2e62f233eab3d42b648c122a3a29c47cc108ca314dfd5cbb59cd3a143fd623"}, - {file = "torchvision-0.17.2-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:9b83e55ee7d0a1704f52b9c0ac87388e7a6d1d98a6bde7b0b35f9ab54d7bda54"}, - {file = "torchvision-0.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e031004a1bc432c980a7bd642f6c189a3efc316e423fc30b5569837166a4e28d"}, - {file = "torchvision-0.17.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:3bbc24b7713e8f22766992562547d8b4b10001208d372fe599255af84bfd1a69"}, - {file = "torchvision-0.17.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:833fd2e4216ced924c8aca0525733fe727f9a1af66dfad7c5be7257e97c39678"}, - {file = "torchvision-0.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:6835897df852fad1015e6a106c167c83848114cbcc7d86112384a973404e4431"}, - {file = "torchvision-0.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:14fd1d4a033c325bdba2d03a69c3450cab6d3a625f85cc375781d9237ca5d04d"}, - {file = "torchvision-0.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9c3acbebbe379af112b62b535820174277b1f3eed30df264a4e458d58ee4e5b2"}, - {file = "torchvision-0.17.2-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:77d680adf6ce367166a186d2c7fda3a73807ab9a03b2c31a03fa8812c8c5335b"}, - {file = "torchvision-0.17.2-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:f1c9ab3152cfb27f83aca072cac93a3a4c4e4ab0261cf0f2d516b9868a4e96f3"}, - {file = "torchvision-0.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:3f784381419f3ed3f2ec2aa42fb4aeec5bf4135e298d1631e41c926e6f1a0dff"}, - {file = "torchvision-0.17.2-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:b83aac8d78f48981146d582168d75b6c947cfb0a7693f76e219f1926f6e595a3"}, - {file = "torchvision-0.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1ece40557e122d79975860a005aa7e2a9e2e6c350a03e78a00ec1450083312fd"}, - {file = "torchvision-0.17.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:32dbeba3987e20f2dc1bce8d1504139fff582898346dfe8ad98d649f97ca78fa"}, - {file = "torchvision-0.17.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:35ba5c1600c3203549d2316422a659bd20c0cfda1b6085eec94fb9f35f55ca43"}, - {file = "torchvision-0.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:2f69570f50b1d195e51bc03feffb7b7728207bc36efcfb1f0813712b2379d881"}, - {file = "torchvision-0.17.2-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:4868bbfa55758c8107e69a0e7dd5e77b89056035cd38b767ad5b98cdb71c0f0d"}, - {file = "torchvision-0.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:efd6d0dd0668e15d01a2cffadc74068433b32cbcf5692e0c4aa15fc5cb250ce7"}, - {file = "torchvision-0.17.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:7dc85b397f6c6d9ef12716ce0d6e11ac2b803f5cccff6fe3966db248e7774478"}, - {file = "torchvision-0.17.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:d506854c5acd69b20a8b6641f01fe841685a21c5406b56813184f1c9fc94279e"}, - {file = "torchvision-0.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:067095e87a020a7a251ac1d38483aa591c5ccb81e815527c54db88a982fc9267"}, -] +python-versions = "*" +files = [] [package.dependencies] numpy = "*" pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0" torch = "2.2.2" -[package.extras] -scipy = ["scipy"] - -[[package]] -name = "torchvision" -version = "0.18.0" -description = "image and video datasets and models for torch deep learning" -optional = false -python-versions = ">=3.8" -files = [ - {file = "torchvision-0.18.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dd61628a3d189c6852a12dc5ed4cd2eece66d2d67f35a866cb16f1dcb06c8c62"}, - {file = "torchvision-0.18.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:493c45f9937dad37aa1b64b14da17c7a589c72b91adc4837d431009cfe29bd53"}, - {file = "torchvision-0.18.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:5337f6acfa1fe959d5cb340d01a00614d6b31ce7a4824ccb95435a85c5273b95"}, - {file = "torchvision-0.18.0-cp310-cp310-win_amd64.whl", hash = "sha256:bd8e6f3b5beb49965f15c461302488edfa3d8c2d01d3bb79b150d6fb62711e3a"}, - {file = "torchvision-0.18.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6896a52168befe1105fb3c9335287390ed227e71d1e4ec4d68b62e8a3099fc09"}, - {file = "torchvision-0.18.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:3d7955398d4ceaad77c487c2c44f6f7813112402c9bab8cd906d346005891048"}, - {file = "torchvision-0.18.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:e5a24d620cea14a4bb89f24aa2b506230c0a16a3ada57fc53ad80cfd256a2128"}, - {file = "torchvision-0.18.0-cp311-cp311-win_amd64.whl", hash = "sha256:6ad70ddfa879bda5ed886b2518fe562640e0059787cbd65cb2bffa7674541410"}, - {file = "torchvision-0.18.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:eb9d83c0e1dbb54ecb0fb04c87f786333e3a6fb8b9c400aca7c31081f9aa5707"}, - {file = "torchvision-0.18.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:b657d052d146f24cb3b2a78219bfc82ae70a9706671c50f632528907d10cccec"}, - {file = "torchvision-0.18.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a964afbc7ddf50a46b941477f6c35729b416deedd139756befd488245e2e226d"}, - {file = "torchvision-0.18.0-cp312-cp312-win_amd64.whl", hash = "sha256:7c770f0f748e0b17f57c0297508d7254f686cdf03fc2e2949f422b20574f4c0f"}, - {file = "torchvision-0.18.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2115a1906c015f5da9ceedc40a983313b0fd6e2c8a17108a92991706f51f6987"}, - {file = "torchvision-0.18.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6323f7e5423ff2594d5891863b919deb9d0de95f01c36bf26fbd879036b6ed08"}, - {file = "torchvision-0.18.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:925d0a82cccf6f986c18b29b4392a942db65cbdb73c13a129c8493822eb9e36f"}, - {file = "torchvision-0.18.0-cp38-cp38-win_amd64.whl", hash = "sha256:95b42d0dc599b47a01530c7439a5751e67e45b85e3a67113989cf7c7c70f2039"}, - {file = "torchvision-0.18.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:75e22ecf44a13b8f95b8ad421c0261282d859c61816badaca1959e073ccdd691"}, - {file = "torchvision-0.18.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4c334b3e719ba0a9ba6e15d4aff1178f5e6d029174f346163fed525f0ccfffd3"}, - {file = "torchvision-0.18.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:36efd87001c6bee2383e043e46a025affb03179747c8f4777b9918527ffce756"}, - {file = "torchvision-0.18.0-cp39-cp39-win_amd64.whl", hash = "sha256:ccc292e093771d5baacf5535ac4416306b6b5f15676341cd4d010d8542eace25"}, -] - -[package.dependencies] -numpy = "*" -pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0" -torch = "2.3.0" - -[package.extras] -scipy = ["scipy"] - [[package]] name = "torchvision" version = "0.18.0+cpu" @@ -7058,29 +6913,6 @@ build = ["cmake (>=3.20)", "lit"] tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"] tutorials = ["matplotlib", "pandas", "tabulate", "torch"] -[[package]] -name = "triton" -version = "2.3.0" -description = "A language and compiler for custom Deep Learning operations" -optional = false -python-versions = "*" -files = [ - {file = "triton-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ce4b8ff70c48e47274c66f269cce8861cf1dc347ceeb7a67414ca151b1822d8"}, - {file = "triton-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c3d9607f85103afdb279938fc1dd2a66e4f5999a58eb48a346bd42738f986dd"}, - {file = "triton-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:218d742e67480d9581bafb73ed598416cc8a56f6316152e5562ee65e33de01c0"}, - {file = "triton-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381ec6b3dac06922d3e4099cfc943ef032893b25415de295e82b1a82b0359d2c"}, - {file = "triton-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:038e06a09c06a164fef9c48de3af1e13a63dc1ba3c792871e61a8e79720ea440"}, - {file = "triton-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d8f636e0341ac348899a47a057c3daea99ea7db31528a225a3ba4ded28ccc65"}, -] - -[package.dependencies] -filelock = "*" - -[package.extras] -build = ["cmake (>=3.20)", "lit"] -tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"] -tutorials = ["matplotlib", "pandas", "tabulate", "torch"] - [[package]] name = "typer" version = "0.12.3" @@ -7855,4 +7687,4 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-it [metadata] lock-version = "2.0" python-versions = ">=3.10.0,<3.12" -content-hash = "efc14eedd786e12d84883e0b2dd2319a4b538a1a88891d995cd4c3fde14da2c3" +content-hash = "a8a8fe2d43c0e7ba68e08a1bb71077070ae909c9225ec2e2fc35e47b30da4d9a" diff --git a/pyproject.toml b/pyproject.toml index aa7ce73b..bbfcfa56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ llama-index-llms-huggingface = "^0.2.0" pytest-asyncio = "^0.23.7" pytest-cov = "^5.0.0" xlrd = "^2.0.1" +markdown = "^3.6" [tool.poetry.scripts] pai_rag = "pai_rag.main:main" diff --git a/src/pai_rag/app/api/models.py b/src/pai_rag/app/api/models.py index af3dbecd..6cbe25c6 100644 --- a/src/pai_rag/app/api/models.py +++ b/src/pai_rag/app/api/models.py @@ -4,9 +4,7 @@ class RagQuery(BaseModel): question: str - topk: int | None = 3 - topp: float | None = 0.8 - temperature: float | None = 0.7 + temperature: float | None = 0.1 vector_topk: int | None = 3 score_threshold: float | None = 0.5 chat_history: List[Dict[str, str]] | None = None @@ -14,9 +12,7 @@ class RagQuery(BaseModel): class LlmQuery(BaseModel): question: str - topk: int | None = 3 - topp: float | None = 0.8 - temperature: float | None = 0.7 + temperature: float | None = 0.1 chat_history: List[Dict[str, str]] | None = None diff --git a/src/pai_rag/app/web/rag_client.py b/src/pai_rag/app/web/rag_client.py index 8d6ad92e..7d034377 100644 --- a/src/pai_rag/app/web/rag_client.py +++ b/src/pai_rag/app/web/rag_client.py @@ -2,6 +2,8 @@ from typing import Any import requests +import html +import markdown cache_config = None @@ -55,11 +57,9 @@ def query_llm( self, text: str, session_id: str = None, - temperature: float = 0.7, - top_p: float = 0.8, - eas_llm_top_k: float = 30, + temperature: float = 0.1, ): - q = dict(question=text, topp=top_p, topk=eas_llm_top_k, temperature=temperature) + q = dict(question=text, temperature=temperature) r = requests.post(self.llm_url, headers={"X-Session-ID": session_id}, json=q) r.raise_for_status() @@ -76,12 +76,14 @@ def query_vector(self, text: str): session_id = r.headers["x-session-id"] response = dotdict(json.loads(r.text)) response.session_id = session_id - formatted_text = "\n\n".join( - [ - f"""[Doc {i+1}] [score: {doc["score"]}]\n{doc["text"]}""" - for i, doc in enumerate(response["docs"]) - ] - ) + formatted_text = "DocumentScoreText\n" + for i, doc in enumerate(response["docs"]): + html_content = markdown.markdown(doc["text"]) + safe_html_content = html.escape(html_content).replace("\n", "
") + formatted_text += 'Doc {}{}{}\n'.format( + i + 1, doc["score"], safe_html_content + ) + formatted_text = "\n\n" + formatted_text + "\n
" response["answer"] = formatted_text return response diff --git a/src/pai_rag/app/web/tabs/chat_tab.py b/src/pai_rag/app/web/tabs/chat_tab.py index ea654657..c6eee69d 100644 --- a/src/pai_rag/app/web/tabs/chat_tab.py +++ b/src/pai_rag/app/web/tabs/chat_tab.py @@ -68,7 +68,9 @@ def create_chat_tab() -> Dict[str, Any]: ) with gr.Column(visible=True) as vs_col: - vec_model_argument = gr.Accordion("Parameters of Vector Retrieval") + vec_model_argument = gr.Accordion( + "Parameters of Vector Retrieval", open=False + ) with vec_model_argument: similarity_top_k = gr.Slider( @@ -101,38 +103,22 @@ def create_chat_tab() -> Dict[str, Any]: retrieval_mode, } with gr.Column(visible=True) as llm_col: - model_argument = gr.Accordion("Inference Parameters of LLM") + model_argument = gr.Accordion("Inference Parameters of LLM", open=False) with model_argument: include_history = gr.Checkbox( label="Chat history", info="Query with chat history.", elem_id="include_history", ) - llm_topk = gr.Slider( - minimum=0, - maximum=100, - step=1, - value=30, - elem_id="llm_topk", - label="Top K (choose between 0 and 100)", - ) - llm_topp = gr.Slider( - minimum=0, - maximum=1, - step=0.01, - value=0.8, - elem_id="llm_topp", - label="Top P (choose between 0 and 1)", - ) llm_temp = gr.Slider( minimum=0, maximum=1, - step=0.01, - value=0.7, - elem_id="llm_temp", + step=0.001, + value=0.1, + elem_id="llm_temperature", label="Temperature (choose between 0 and 1)", ) - llm_args = {llm_topk, llm_topp, llm_temp, include_history} + llm_args = {llm_temp, include_history} with gr.Column(visible=True) as lc_col: prm_type = gr.Radio( @@ -198,26 +184,32 @@ def change_query_radio(query_type): if query_type == "Retrieval": return { vs_col: gr.update(visible=True), + vec_model_argument: gr.update(open=True), llm_col: gr.update(visible=False), + model_argument: gr.update(open=False), lc_col: gr.update(visible=False), } elif query_type == "LLM": return { vs_col: gr.update(visible=False), + vec_model_argument: gr.update(open=False), llm_col: gr.update(visible=True), + model_argument: gr.update(open=True), lc_col: gr.update(visible=False), } elif query_type == "RAG (Retrieval + LLM)": return { vs_col: gr.update(visible=True), + vec_model_argument: gr.update(open=False), llm_col: gr.update(visible=True), + model_argument: gr.update(open=False), lc_col: gr.update(visible=True), } query_type.change( fn=change_query_radio, inputs=query_type, - outputs=[vs_col, llm_col, lc_col], + outputs=[vs_col, vec_model_argument, llm_col, model_argument, lc_col], ) with gr.Column(scale=8): @@ -239,6 +231,12 @@ def change_query_radio(query_type): [question, chatbot, cur_tokens], api_name="respond", ) + question.submit( + respond, + chat_args, + [question, chatbot, cur_tokens], + api_name="respond", + ) clearBtn.click(clear_history, [chatbot], [chatbot, cur_tokens]) return { similarity_top_k.elem_id: similarity_top_k, diff --git a/src/pai_rag/app/web/view_model.py b/src/pai_rag/app/web/view_model.py index c13c9dc5..2b46c299 100644 --- a/src/pai_rag/app/web/view_model.py +++ b/src/pai_rag/app/web/view_model.py @@ -35,6 +35,7 @@ class ViewModel(BaseModel): llm_eas_model_name: str = None llm_api_key: str = None llm_api_model_name: str = None + llm_temperature: float = 0.1 # chunking parser_type: str = "Sentence" @@ -115,6 +116,7 @@ def sync_app_config(self, config): self.llm_eas_url = config["llm"].get("endpoint", self.llm_eas_url) self.llm_eas_token = config["llm"].get("token", self.llm_eas_token) self.llm_api_key = config["llm"].get("api_key", self.llm_api_key) + self.llm_temperature = config["llm"].get("temperature", self.llm_temperature) if self.llm == "PaiEAS": self.llm_eas_model_name = config["llm"].get("name", self.llm_eas_model_name) else: @@ -217,6 +219,7 @@ def to_app_config(self): config["llm"]["endpoint"] = self.llm_eas_url config["llm"]["token"] = self.llm_eas_token config["llm"]["api_key"] = self.llm_api_key + config["llm"]["temperature"] = self.llm_temperature if self.llm == "PaiEas": config["llm"]["name"] = self.llm_eas_model_name else: diff --git a/src/pai_rag/modules/llm/llm_module.py b/src/pai_rag/modules/llm/llm_module.py index d5417622..09482cbf 100644 --- a/src/pai_rag/modules/llm/llm_module.py +++ b/src/pai_rag/modules/llm/llm_module.py @@ -24,13 +24,13 @@ def _create_new_instance(self, new_params: Dict[str, Any]): f""" [Parameters][LLM:OpenAI] model = {config.get("name", "gpt-3.5-turbo")}, - temperature = {config.get("temperature", 0.5)}, + temperature = {config.get("temperature", 0.1)}, system_prompt = {config.get("system_prompt", "Please answer in Chinese.")} """ ) llm = OpenAI( model=config.get("name", "gpt-3.5-turbo"), - temperature=config.get("temperature", 0.5), + temperature=config.get("temperature", 0.1), system_prompt=config.get("system_prompt", "Please answer in Chinese."), api_key=config.get("api_key", None), ) @@ -39,13 +39,13 @@ def _create_new_instance(self, new_params: Dict[str, Any]): f""" [Parameters][LLM:AzureOpenAI] model = {config.get("name", "gpt-35-turbo")}, - temperature = {config.get("temperature", 0.5)}, + temperature = {config.get("temperature", 0.1)}, system_prompt = {config.get("system_prompt", "Please answer in Chinese.")} """ ) llm = AzureOpenAI( model=config.get("name", "gpt-35-turbo"), - temperature=config.get("temperature", 0.5), + temperature=config.get("temperature", 0.1), system_prompt=config.get("system_prompt", "Please answer in Chinese."), ) elif source == "dashscope": @@ -56,7 +56,9 @@ def _create_new_instance(self, new_params: Dict[str, Any]): model = {model_name} """ ) - llm = DashScope(model_name=model_name) + llm = DashScope( + model_name=model_name, temperature=config.get("temperature", 0.1) + ) elif source == "paieas": model_name = config["name"] endpoint = config["endpoint"] @@ -69,7 +71,12 @@ def _create_new_instance(self, new_params: Dict[str, Any]): token = {token} """ ) - llm = PaiEAS(endpoint=endpoint, token=token, model_name=model_name) + llm = PaiEAS( + endpoint=endpoint, + token=token, + model_name=model_name, + temperature=config.get("temperature", 0.1), + ) else: raise ValueError(f"Unknown LLM source: '{config['llm']['source']}'")