diff --git a/.github/check_and_update.py b/.github/check_and_update.py new file mode 100644 index 000000000000..448844fdc10c --- /dev/null +++ b/.github/check_and_update.py @@ -0,0 +1,79 @@ +import hashlib +from huggingface_hub import hf_hub_download, get_paths_info +import requests +import sys +import os + +uri = sys.argv[0] +file_name = uri.split('/')[-1] + +# Function to parse the URI and determine download method +def parse_uri(uri): + if uri.startswith('huggingface://'): + repo_id = uri.split('://')[1] + return 'huggingface', repo_id.rsplit('/', 1)[0] + elif 'huggingface.co' in uri: + parts = uri.split('/resolve/') + if len(parts) > 1: + repo_path = parts[0].split('https://huggingface.co/')[-1] + return 'huggingface', repo_path + return 'direct', uri + +def calculate_sha256(file_path): + sha256_hash = hashlib.sha256() + with open(file_path, 'rb') as f: + for byte_block in iter(lambda: f.read(4096), b''): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + +def manual_safety_check_hf(repo_id): + scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan") + scan = scanResponse.json() + if scan['hasUnsafeFile']: + return scan + return None + +download_type, repo_id_or_url = parse_uri(uri) + +new_checksum = None + +# Decide download method based on URI type +if download_type == 'huggingface': + # Check if the repo is flagged as dangerous by HF + hazard = manual_safety_check_hf(repo_id_or_url) + if hazard != None: + print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name) + sys.exit(5) + # Use HF API to pull sha + for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'): + try: + new_checksum = file.lfs.sha256 + break + except Exception as e: + print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr) + sys.exit(2) + if new_checksum is None: + try: + file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name) + except Exception as e: + print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr) + sys.exit(2) +else: + response = requests.get(repo_id_or_url) + if response.status_code == 200: + with open(file_name, 'wb') as f: + f.write(response.content) + file_path = file_name + elif response.status_code == 404: + print(f'File not found: {response.status_code}', file=sys.stderr) + sys.exit(2) + else: + print(f'Error downloading file: {response.status_code}', file=sys.stderr) + sys.exit(1) + +if new_checksum is None: + new_checksum = calculate_sha256(file_path) + print(new_checksum) + os.remove(file_path) +else: + print(new_checksum) diff --git a/.github/checksum_checker.sh b/.github/checksum_checker.sh index 01242af6e655..174e6d3f41a2 100644 --- a/.github/checksum_checker.sh +++ b/.github/checksum_checker.sh @@ -14,77 +14,14 @@ function check_and_update_checksum() { idx="$5" # Download the file and calculate new checksum using Python - new_checksum=$(python3 -c " -import hashlib -from huggingface_hub import hf_hub_download, get_paths_info -import requests -import sys -import os - -uri = '$uri' -file_name = uri.split('/')[-1] - -# Function to parse the URI and determine download method -# Function to parse the URI and determine download method -def parse_uri(uri): - if uri.startswith('huggingface://'): - repo_id = uri.split('://')[1] - return 'huggingface', repo_id.rsplit('/', 1)[0] - elif 'huggingface.co' in uri: - parts = uri.split('/resolve/') - if len(parts) > 1: - repo_path = parts[0].split('https://huggingface.co/')[-1] - return 'huggingface', repo_path - return 'direct', uri - -def calculate_sha256(file_path): - sha256_hash = hashlib.sha256() - with open(file_path, 'rb') as f: - for byte_block in iter(lambda: f.read(4096), b''): - sha256_hash.update(byte_block) - return sha256_hash.hexdigest() - -download_type, repo_id_or_url = parse_uri(uri) - -new_checksum = None - -# Decide download method based on URI type -if download_type == 'huggingface': - # Use HF API to pull sha - for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'): - try: - new_checksum = file.lfs.sha256 - break - except Exception as e: - print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr) - sys.exit(2) - if new_checksum is None: - try: - file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name) - except Exception as e: - print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr) - sys.exit(2) -else: - response = requests.get(repo_id_or_url) - if response.status_code == 200: - with open(file_name, 'wb') as f: - f.write(response.content) - file_path = file_name - elif response.status_code == 404: - print(f'File not found: {response.status_code}', file=sys.stderr) - sys.exit(2) - else: - print(f'Error downloading file: {response.status_code}', file=sys.stderr) - sys.exit(1) - -if new_checksum is None: - new_checksum = calculate_sha256(file_path) - print(new_checksum) - os.remove(file_path) -else: - print(new_checksum) + new_checksum=$(python3 ./check_and_update.py $uri) + result=$? -") + if [[ result -eq 5]]; then + echo "Contaminated entry detected, deleting entry for $model_name..." + yq eval -i "del([$idx])" "$input_yaml" + return + fi if [[ "$new_checksum" == "" ]]; then echo "Error calculating checksum for $file_name. Skipping..." @@ -94,7 +31,7 @@ else: echo "Checksum for $file_name: $new_checksum" # Compare and update the YAML file if checksums do not match - result=$? + if [[ $result -eq 2 ]]; then echo "File not found, deleting entry for $file_name..." # yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml" diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 3e4d8e4d8905..290f87938900 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -35,15 +35,16 @@ jobs: max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }} matrix: include: - - build-type: '' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-ffmpeg' - ffmpeg: 'true' - image-type: 'extras' - runs-on: 'arc-runner-set' - base-image: "ubuntu:22.04" - makeflags: "--jobs=3 --output-sync=target" + # This is basically covered by the AIO test + # - build-type: '' + # platforms: 'linux/amd64' + # tag-latest: 'false' + # tag-suffix: '-ffmpeg' + # ffmpeg: 'true' + # image-type: 'extras' + # runs-on: 'arc-runner-set' + # base-image: "ubuntu:22.04" + # makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "4" @@ -55,85 +56,85 @@ jobs: runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" makeflags: "--jobs=3 --output-sync=target" - - build-type: 'hipblas' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-hipblas' - ffmpeg: 'false' - image-type: 'extras' - base-image: "rocm/dev-ubuntu-22.04:6.1" - grpc-base-image: "ubuntu:22.04" - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" - - build-type: 'sycl_f16' - platforms: 'linux/amd64' - tag-latest: 'false' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" - tag-suffix: 'sycl-f16-ffmpeg' - ffmpeg: 'true' - image-type: 'extras' - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" - core-image-build: - uses: ./.github/workflows/image_build.yml - with: - tag-latest: ${{ matrix.tag-latest }} - tag-suffix: ${{ matrix.tag-suffix }} - ffmpeg: ${{ matrix.ffmpeg }} - image-type: ${{ matrix.image-type }} - build-type: ${{ matrix.build-type }} - cuda-major-version: ${{ matrix.cuda-major-version }} - cuda-minor-version: ${{ matrix.cuda-minor-version }} - platforms: ${{ matrix.platforms }} - runs-on: ${{ matrix.runs-on }} - base-image: ${{ matrix.base-image }} - grpc-base-image: ${{ matrix.grpc-base-image }} - makeflags: ${{ matrix.makeflags }} - secrets: - dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} - dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} - quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} - quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} - strategy: - matrix: - include: - - build-type: '' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-ffmpeg-core' - ffmpeg: 'true' - image-type: 'core' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - makeflags: "--jobs=4 --output-sync=target" - - build-type: 'sycl_f16' - platforms: 'linux/amd64' - tag-latest: 'false' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" - tag-suffix: 'sycl-f16-ffmpeg-core' - ffmpeg: 'true' - image-type: 'core' - runs-on: 'arc-runner-set' - makeflags: "--jobs=3 --output-sync=target" - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "4" - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-cublas-cuda12-ffmpeg-core' - ffmpeg: 'true' - image-type: 'core' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - makeflags: "--jobs=4 --output-sync=target" - - build-type: 'vulkan' - platforms: 'linux/amd64' - tag-latest: 'false' - tag-suffix: '-vulkan-ffmpeg-core' - ffmpeg: 'true' - image-type: 'core' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - makeflags: "--jobs=4 --output-sync=target" + # - build-type: 'hipblas' + # platforms: 'linux/amd64' + # tag-latest: 'false' + # tag-suffix: '-hipblas' + # ffmpeg: 'false' + # image-type: 'extras' + # base-image: "rocm/dev-ubuntu-22.04:6.1" + # grpc-base-image: "ubuntu:22.04" + # runs-on: 'arc-runner-set' + # makeflags: "--jobs=3 --output-sync=target" + # - build-type: 'sycl_f16' + # platforms: 'linux/amd64' + # tag-latest: 'false' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # grpc-base-image: "ubuntu:22.04" + # tag-suffix: 'sycl-f16-ffmpeg' + # ffmpeg: 'true' + # image-type: 'extras' + # runs-on: 'arc-runner-set' + # makeflags: "--jobs=3 --output-sync=target" + # core-image-build: + # uses: ./.github/workflows/image_build.yml + # with: + # tag-latest: ${{ matrix.tag-latest }} + # tag-suffix: ${{ matrix.tag-suffix }} + # ffmpeg: ${{ matrix.ffmpeg }} + # image-type: ${{ matrix.image-type }} + # build-type: ${{ matrix.build-type }} + # cuda-major-version: ${{ matrix.cuda-major-version }} + # cuda-minor-version: ${{ matrix.cuda-minor-version }} + # platforms: ${{ matrix.platforms }} + # runs-on: ${{ matrix.runs-on }} + # base-image: ${{ matrix.base-image }} + # grpc-base-image: ${{ matrix.grpc-base-image }} + # makeflags: ${{ matrix.makeflags }} + # secrets: + # dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} + # dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} + # quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} + # quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} + # strategy: + # matrix: + # include: + # - build-type: '' + # platforms: 'linux/amd64' + # tag-latest: 'false' + # tag-suffix: '-ffmpeg-core' + # ffmpeg: 'true' + # image-type: 'core' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # makeflags: "--jobs=4 --output-sync=target" + # - build-type: 'sycl_f16' + # platforms: 'linux/amd64' + # tag-latest: 'false' + # base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + # grpc-base-image: "ubuntu:22.04" + # tag-suffix: 'sycl-f16-ffmpeg-core' + # ffmpeg: 'true' + # image-type: 'core' + # runs-on: 'arc-runner-set' + # makeflags: "--jobs=3 --output-sync=target" + # - build-type: 'cublas' + # cuda-major-version: "12" + # cuda-minor-version: "4" + # platforms: 'linux/amd64' + # tag-latest: 'false' + # tag-suffix: '-cublas-cuda12-ffmpeg-core' + # ffmpeg: 'true' + # image-type: 'core' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # makeflags: "--jobs=4 --output-sync=target" + # - build-type: 'vulkan' + # platforms: 'linux/amd64' + # tag-latest: 'false' + # tag-suffix: '-vulkan-ffmpeg-core' + # ffmpeg: 'true' + # image-type: 'core' + # runs-on: 'ubuntu-latest' + # base-image: "ubuntu:22.04" + # makeflags: "--jobs=4 --output-sync=target" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 6a7682d47f62..8c411450b28d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,8 +1,10 @@ name: Build and Release on: -- push -- pull_request + push: + branches: + - master + pull_request: env: GRPC_VERSION: v1.64.0 @@ -32,7 +34,7 @@ jobs: run: | sudo apt-get update sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk - sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu + sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev - name: Install CUDA Dependencies run: | curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb @@ -52,7 +54,8 @@ jobs: run: | git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ - cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ + cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \ + cd cmake/build && cmake -DgRPC_INSTALL=ON \ -DgRPC_BUILD_TESTS=OFF \ ../.. && sudo make --jobs 5 --output-sync=target - name: Install gRPC @@ -77,7 +80,6 @@ jobs: echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \ echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN GRPC_DIR=$PWD/grpc - cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \ GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \ mkdir -p $GRPC_CROSS_BUILD_DIR && \ @@ -147,7 +149,7 @@ jobs: - name: Dependencies run: | sudo apt-get update - sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake + sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev - name: Intel Dependencies run: | wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null @@ -197,7 +199,8 @@ jobs: if: steps.cache-grpc.outputs.cache-hit != 'true' run: | git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ - cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ + cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \ + cd cmake/build && cmake -DgRPC_INSTALL=ON \ -DgRPC_BUILD_TESTS=OFF \ ../.. && sudo make --jobs 5 --output-sync=target - name: Install gRPC diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0d102563470e..082e27cba556 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -71,6 +71,7 @@ jobs: run: | sudo apt-get update sudo apt-get install build-essential curl ffmpeg + sudo apt-get install -y libgmock-dev curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ @@ -120,7 +121,8 @@ jobs: if: steps.cache-grpc.outputs.cache-hit != 'true' run: | git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \ - cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ + cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \ + cmake -DgRPC_INSTALL=ON \ -DgRPC_BUILD_TESTS=OFF \ ../.. && sudo make --jobs 5 - name: Install gRPC diff --git a/Dockerfile b/Dockerfile index 907ad54b442d..e1c06a4aed11 100644 --- a/Dockerfile +++ b/Dockerfile @@ -227,6 +227,7 @@ RUN apt-get update && \ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ mkdir -p /build/grpc/cmake/build && \ cd /build/grpc/cmake/build && \ + sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \ cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ make && \ make install && \ diff --git a/Makefile b/Makefile index 0cb3b73dec1f..cdc7b53d3226 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=c4dd11d1d3903e1922c06242e189f6310fc4d8c3 +CPPLLAMA_VERSION?=dd07a123b79f9bd9e8a4ba0447427b3083e9347a # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all @@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp -WHISPER_CPP_VERSION?=1c31f9d4a8936aec550e6c4dc9ca5cae3b4f304a +WHISPER_CPP_VERSION?=d207c6882247984689091ae9d780d2e51eab1df7 # bert.cpp version BERT_REPO?=https://github.com/go-skynet/go-bert.cpp @@ -747,7 +747,6 @@ backend/cpp/llama/llama.cpp: INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \ - -DABSL_BUILD_TESTING=OFF \ -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \ -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \ -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \ diff --git a/core/application.go b/core/application.go index 78a7af9e184b..e4efbdd0ab93 100644 --- a/core/application.go +++ b/core/application.go @@ -28,7 +28,6 @@ type Application struct { // LocalAI System Services BackendMonitorService *services.BackendMonitorService GalleryService *services.GalleryService - ListModelsService *services.ListModelsService LocalAIMetricsService *services.LocalAIMetricsService // OpenAIService *services.OpenAIService } diff --git a/core/backend/llm.go b/core/backend/llm.go index a6f7fe5671d4..9268fbbc4f8f 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -57,7 +57,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im if _, err := os.Stat(modelFile); os.IsNotExist(err) { utils.ResetDownloadTimers() // if we failed to load the model, we try to download it - err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction) + err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans) if err != nil { return nil, err } diff --git a/core/cli/models.go b/core/cli/models.go index d62ad3185e4e..030470185e74 100644 --- a/core/cli/models.go +++ b/core/cli/models.go @@ -2,6 +2,7 @@ package cli import ( "encoding/json" + "errors" "fmt" cliContext "github.com/mudler/LocalAI/core/cli/context" @@ -24,7 +25,8 @@ type ModelsList struct { } type ModelsInstall struct { - ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` + DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"` + ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` ModelsCMDFlags `embed:""` } @@ -88,9 +90,15 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error { return err } + err = gallery.SafetyScanGalleryModel(model) + if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) { + return err + } + log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model") } - err = startup.InstallModels(galleries, "", mi.ModelsPath, progressCallback, modelName) + + err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName) if err != nil { return err } diff --git a/core/cli/run.go b/core/cli/run.go index 4a3133918df8..d7b45f77ce09 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -42,26 +42,27 @@ type RunCMD struct { Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"` ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"` - Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` - CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"` - CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` - LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"` - CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"` - UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` - APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` - DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"` - OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"` - Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"` - Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"` - ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"` - SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"` - PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"` - ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"` - EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"` - WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"` - EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"` - WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` - Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` + Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` + CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"` + CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` + LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"` + CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"` + UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` + APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` + DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"` + DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"` + OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"` + Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"` + Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"` + ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"` + SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"` + PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"` + ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"` + EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"` + WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"` + EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"` + WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` + Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` } func (r *RunCMD) Run(ctx *cliContext.Context) error { @@ -92,6 +93,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithApiKeys(r.APIKeys), config.WithModelsURL(append(r.Models, r.ModelArgs...)...), config.WithOpaqueErrors(r.OpaqueErrors), + config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan), } token := "" diff --git a/core/cli/util.go b/core/cli/util.go index e8ccb942e125..a7204092bed2 100644 --- a/core/cli/util.go +++ b/core/cli/util.go @@ -1,16 +1,22 @@ package cli import ( + "encoding/json" + "errors" "fmt" "github.com/rs/zerolog/log" cliContext "github.com/mudler/LocalAI/core/cli/context" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/pkg/downloader" gguf "github.com/thxcode/gguf-parser-go" ) type UtilCMD struct { GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"` + HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"` } type GGUFInfoCMD struct { @@ -18,6 +24,12 @@ type GGUFInfoCMD struct { Header bool `optional:"" default:"false" name:"header" help:"Show header information"` } +type HFScanCMD struct { + ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` + Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"` + ToScan []string `arg:""` +} + func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error { if u.Args == nil || len(u.Args) == 0 { return fmt.Errorf("no GGUF file provided") @@ -53,3 +65,37 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error { return nil } + +func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error { + log.Info().Msg("LocalAI Security Scanner - This is BEST EFFORT functionality! Currently limited to huggingface models!") + if len(hfscmd.ToScan) == 0 { + log.Info().Msg("Checking all installed models against galleries") + var galleries []config.Gallery + if err := json.Unmarshal([]byte(hfscmd.Galleries), &galleries); err != nil { + log.Error().Err(err).Msg("unable to load galleries") + } + + err := gallery.SafetyScanGalleryModels(galleries, hfscmd.ModelsPath) + if err == nil { + log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.") + } else { + log.Error().Err(err).Msg("! WARNING ! A known-vulnerable model is installed!") + } + return err + } else { + var errs error = nil + for _, uri := range hfscmd.ToScan { + log.Info().Str("uri", uri).Msg("scanning specific uri") + scanResults, err := downloader.HuggingFaceScan(uri) + if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) { + log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!") + errs = errors.Join(errs, err) + } + } + if errs != nil { + return errs + } + log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.") + return nil + } +} diff --git a/core/config/application_config.go b/core/config/application_config.go index 1bac349b337b..7233d1ac0916 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -31,6 +31,7 @@ type ApplicationConfig struct { PreloadModelsFromPath string CORSAllowOrigins string ApiKeys []string + EnforcePredownloadScans bool OpaqueErrors bool P2PToken string @@ -301,6 +302,12 @@ func WithApiKeys(apiKeys []string) AppOption { } } +func WithEnforcedPredownloadScans(enforced bool) AppOption { + return func(o *ApplicationConfig) { + o.EnforcePredownloadScans = enforced + } +} + func WithOpaqueErrors(opaque bool) AppOption { return func(o *ApplicationConfig) { o.OpaqueErrors = opaque diff --git a/core/gallery/gallery.go b/core/gallery/gallery.go index be1677559765..231dce6d491c 100644 --- a/core/gallery/gallery.go +++ b/core/gallery/gallery.go @@ -15,7 +15,7 @@ import ( ) // Installs a model from the gallery -func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error { +func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error { applyModel := func(model *GalleryModel) error { name = strings.ReplaceAll(name, string(os.PathSeparator), "__") @@ -63,7 +63,7 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s return err } - if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus); err != nil { + if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan); err != nil { return err } @@ -228,3 +228,29 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin return err } + +// This is ***NEVER*** going to be perfect or finished. +// This is a BEST EFFORT function to surface known-vulnerable models to users. +func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error { + galleryModels, err := AvailableGalleryModels(galleries, basePath) + if err != nil { + return err + } + for _, gM := range galleryModels { + if gM.Installed { + err = errors.Join(err, SafetyScanGalleryModel(gM)) + } + } + return err +} + +func SafetyScanGalleryModel(galleryModel *GalleryModel) error { + for _, file := range galleryModel.AdditionalFiles { + scanResults, err := downloader.HuggingFaceScan(file.URI) + if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) { + log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!") + return err + } + } + return nil +} diff --git a/core/gallery/models.go b/core/gallery/models.go index 8d020ff5d99c..28a2e3f2a18a 100644 --- a/core/gallery/models.go +++ b/core/gallery/models.go @@ -1,6 +1,7 @@ package gallery import ( + "errors" "fmt" "os" "path/filepath" @@ -94,7 +95,7 @@ func ReadConfigFile(filePath string) (*Config, error) { return &config, nil } -func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64)) error { +func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) error { // Create base path if it doesn't exist err := os.MkdirAll(basePath, 0750) if err != nil { @@ -112,9 +113,18 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides if err := utils.VerifyPath(file.Filename, basePath); err != nil { return err } + // Create file path filePath := filepath.Join(basePath, file.Filename) + if enforceScan { + scanResults, err := downloader.HuggingFaceScan(file.URI) + if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) { + log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!") + return err + } + } + if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil { return err } diff --git a/core/gallery/models_test.go b/core/gallery/models_test.go index 17a309115ff5..5217253fdcdd 100644 --- a/core/gallery/models_test.go +++ b/core/gallery/models_test.go @@ -21,7 +21,7 @@ var _ = Describe("Model test", func() { defer os.RemoveAll(tempdir) c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) Expect(err).ToNot(HaveOccurred()) - err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}) + err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}, true) Expect(err).ToNot(HaveOccurred()) for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "cerebras.yaml"} { @@ -69,7 +69,7 @@ var _ = Describe("Model test", func() { Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml")) Expect(models[0].Installed).To(BeFalse()) - err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}) + err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true) Expect(err).ToNot(HaveOccurred()) dat, err := os.ReadFile(filepath.Join(tempdir, "bert.yaml")) @@ -106,7 +106,7 @@ var _ = Describe("Model test", func() { c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) Expect(err).ToNot(HaveOccurred()) - err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}) + err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true) Expect(err).ToNot(HaveOccurred()) for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} { @@ -122,7 +122,7 @@ var _ = Describe("Model test", func() { c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) Expect(err).ToNot(HaveOccurred()) - err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}) + err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}, true) Expect(err).ToNot(HaveOccurred()) for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} { @@ -148,7 +148,7 @@ var _ = Describe("Model test", func() { c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml")) Expect(err).ToNot(HaveOccurred()) - err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}) + err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true) Expect(err).To(HaveOccurred()) }) }) diff --git a/core/http/ctx/fiber.go b/core/http/ctx/fiber.go index d298b2903aa5..9405984711ae 100644 --- a/core/http/ctx/fiber.go +++ b/core/http/ctx/fiber.go @@ -5,6 +5,8 @@ import ( "strings" "github.com/gofiber/fiber/v2" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/model" "github.com/rs/zerolog/log" ) @@ -13,7 +15,7 @@ import ( // If no model is specified, it will take the first available // Takes a model string as input which should be the one received from the user request. // It returns the model name resolved from the context and an error if any. -func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) { +func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) { if ctx.Params("model") != "" { modelInput = ctx.Params("model") } @@ -24,7 +26,7 @@ func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput stri // If no model was specified, take the first available if modelInput == "" && !bearerExists && firstModel { - models, _ := loader.ListModels() + models, _ := services.ListModels(cl, loader, "", true) if len(models) > 0 { modelInput = models[0] log.Debug().Msgf("No model specified, using: %s", modelInput) diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go index 12da7b9b0113..bb6901be8878 100644 --- a/core/http/endpoints/elevenlabs/tts.go +++ b/core/http/endpoints/elevenlabs/tts.go @@ -28,7 +28,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi return err } - modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false) + modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false) if err != nil { modelFile = input.ModelID log.Warn().Msgf("Model not found in context: %s", input.ModelID) diff --git a/core/http/endpoints/jina/rerank.go b/core/http/endpoints/jina/rerank.go index 383dcc5e63d1..ddeee745c28e 100644 --- a/core/http/endpoints/jina/rerank.go +++ b/core/http/endpoints/jina/rerank.go @@ -28,7 +28,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a return err } - modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false) + modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false) if err != nil { modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 3ae2eea55622..ca3f58bd9e28 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -29,7 +29,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi return err } - modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false) + modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false) if err != nil { modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go index 34a2d975c10d..5d217173c3e5 100644 --- a/core/http/endpoints/localai/welcome.go +++ b/core/http/endpoints/localai/welcome.go @@ -5,6 +5,7 @@ import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/p2p" + "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/pkg/model" ) @@ -12,7 +13,7 @@ import ( func WelcomeEndpoint(appConfig *config.ApplicationConfig, cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error { return func(c *fiber.Ctx) error { - models, _ := ml.ListModels() + models, _ := services.ListModels(cl, ml, "", true) backendConfigs := cl.GetAllBackendConfigs() galleryConfigs := map[string]*gallery.Config{} @@ -28,10 +29,18 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig, // Get model statuses to display in the UI the operation in progress processingModels, taskTypes := modelStatus() + modelsWithoutConfig := []string{} + + for _, m := range models { + if _, ok := galleryConfigs[m]; !ok { + modelsWithoutConfig = append(modelsWithoutConfig, m) + } + } + summary := fiber.Map{ "Title": "LocalAI API - " + internal.PrintableVersion(), "Version": internal.PrintableVersion(), - "Models": models, + "Models": modelsWithoutConfig, "ModelsConfig": backendConfigs, "GalleryConfig": galleryConfigs, "IsP2PEnabled": p2p.IsP2PEnabled(), diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go index 4882eeaf4af2..ba2ebcdee50a 100644 --- a/core/http/endpoints/openai/assistant.go +++ b/core/http/endpoints/openai/assistant.go @@ -11,6 +11,7 @@ import ( "github.com/gofiber/fiber/v2" "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/services" model "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/utils" "github.com/rs/zerolog/log" @@ -79,7 +80,7 @@ func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) } - if !modelExists(ml, request.Model) { + if !modelExists(cl, ml, request.Model) { log.Warn().Msgf("Model: %s was not found in list of models.", request.Model) return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found") } @@ -213,9 +214,9 @@ func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant { return filteredAssistants } -func modelExists(ml *model.ModelLoader, modelName string) (found bool) { +func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) { found = false - models, err := ml.ListModels() + models, err := services.ListModels(cl, ml, "", true) if err != nil { return } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 1317ee07c6fe..763e3f694e99 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -159,7 +159,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } return func(c *fiber.Ctx) error { - modelFile, input, err := readRequest(c, ml, startupOptions, true) + modelFile, input, err := readRequest(c, cl, ml, startupOptions, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 5eedfaf36d50..b087cc5f8d35 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -57,7 +57,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a } return func(c *fiber.Ctx) error { - modelFile, input, err := readRequest(c, ml, appConfig, true) + modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index a5af12c2f174..bb43ac3b4b9c 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -18,7 +18,7 @@ import ( func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - modelFile, input, err := readRequest(c, ml, appConfig, true) + modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go index de7ea1c6f5cb..e247d84e332b 100644 --- a/core/http/endpoints/openai/embeddings.go +++ b/core/http/endpoints/openai/embeddings.go @@ -23,7 +23,7 @@ import ( // @Router /v1/embeddings [post] func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - model, input, err := readRequest(c, ml, appConfig, true) + model, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 27c11f535944..6c76ba843275 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -66,7 +66,7 @@ func downloadFile(url string) (string, error) { // @Router /v1/images/generations [post] func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readRequest(c, ml, appConfig, false) + m, input, err := readRequest(c, cl, ml, appConfig, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index ba6bd1d72d4c..d446b10008d8 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -2,15 +2,17 @@ package openai import ( "github.com/gofiber/fiber/v2" + "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" + model "github.com/mudler/LocalAI/pkg/model" ) // ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models // @Summary List and describe the various models available in the API. // @Success 200 {object} schema.ModelsDataResponse "Response" // @Router /v1/models [get] -func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error { +func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { // If blank, no filter is applied. filter := c.Query("filter") @@ -18,7 +20,7 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er // By default, exclude any loose files that are already referenced by a configuration file. excludeConfigured := c.QueryBool("excludeConfigured", true) - dataModels, err := lms.ListModels(filter, excludeConfigured) + dataModels, err := modelList(bcl, ml, filter, excludeConfigured) if err != nil { return err } @@ -28,3 +30,20 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er }) } } + +func modelList(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) { + + models, err := services.ListModels(bcl, ml, filter, excludeConfigured) + if err != nil { + return nil, err + } + + dataModels := []schema.OpenAIModel{} + + // Then iterate through the loose files: + for _, m := range models { + dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) + } + + return dataModels, nil +} diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index 009de4a01460..a99ebea2ec41 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -15,7 +15,7 @@ import ( "github.com/rs/zerolog/log" ) -func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { +func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { input := new(schema.OpenAIRequest) // Get input data from the request body @@ -31,7 +31,7 @@ func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfi log.Debug().Msgf("Request received: %s", string(received)) - modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel) + modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel) return modelFile, input, err } diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go index c8e447f79cb0..4e23f8046c6a 100644 --- a/core/http/endpoints/openai/transcription.go +++ b/core/http/endpoints/openai/transcription.go @@ -25,7 +25,7 @@ import ( // @Router /v1/audio/transcriptions [post] func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readRequest(c, ml, appConfig, false) + m, input, err := readRequest(c, cl, ml, appConfig, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index cb454f332cd7..e190bc6d352f 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -5,7 +5,6 @@ import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/http/endpoints/localai" "github.com/mudler/LocalAI/core/http/endpoints/openai" - "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/model" ) @@ -81,8 +80,7 @@ func RegisterOpenAIRoutes(app *fiber.App, app.Static("/generated-audio", appConfig.AudioDir) } - // models - tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance. - app.Get("/v1/models", auth, openai.ListModelsEndpoint(tmpLMS)) - app.Get("/models", auth, openai.ListModelsEndpoint(tmpLMS)) + // List models + app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml)) + app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml)) } diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 51742b819ce2..33706944fa2e 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -27,7 +27,6 @@ func RegisterUIRoutes(app *fiber.App, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, auth func(*fiber.Ctx) error) { - tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance. // keeps the state of models that are being installed from the UI var processingModels = xsync.NewSyncedMap[string, string]() @@ -270,7 +269,7 @@ func RegisterUIRoutes(app *fiber.App, // Show the Chat page app.Get("/chat/:model", auth, func(c *fiber.Ctx) error { - backendConfigs, _ := tmpLMS.ListModels("", true) + backendConfigs, _ := services.ListModels(cl, ml, "", true) summary := fiber.Map{ "Title": "LocalAI - Chat with " + c.Params("model"), @@ -285,7 +284,7 @@ func RegisterUIRoutes(app *fiber.App, }) app.Get("/talk/", auth, func(c *fiber.Ctx) error { - backendConfigs, _ := tmpLMS.ListModels("", true) + backendConfigs, _ := services.ListModels(cl, ml, "", true) if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models @@ -295,7 +294,7 @@ func RegisterUIRoutes(app *fiber.App, summary := fiber.Map{ "Title": "LocalAI - Talk", "ModelsConfig": backendConfigs, - "Model": backendConfigs[0].ID, + "Model": backendConfigs[0], "IsP2PEnabled": p2p.IsP2PEnabled(), "Version": internal.PrintableVersion(), } @@ -306,7 +305,7 @@ func RegisterUIRoutes(app *fiber.App, app.Get("/chat/", auth, func(c *fiber.Ctx) error { - backendConfigs, _ := tmpLMS.ListModels("", true) + backendConfigs, _ := services.ListModels(cl, ml, "", true) if len(backendConfigs) == 0 { // If no model is available redirect to the index which suggests how to install models @@ -314,9 +313,9 @@ func RegisterUIRoutes(app *fiber.App, } summary := fiber.Map{ - "Title": "LocalAI - Chat with " + backendConfigs[0].ID, + "Title": "LocalAI - Chat with " + backendConfigs[0], "ModelsConfig": backendConfigs, - "Model": backendConfigs[0].ID, + "Model": backendConfigs[0], "Version": internal.PrintableVersion(), "IsP2PEnabled": p2p.IsP2PEnabled(), } diff --git a/core/http/views/chat.html b/core/http/views/chat.html index 79c395708e45..67d40bfd5817 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -100,10 +100,10 @@

Chat wit {{ $model:=.Model}} {{ range .ModelsConfig }} - {{ if eq .ID $model }} - + {{ if eq . $model }} + {{ else }} - + {{ end }} {{ end }} diff --git a/core/http/views/index.html b/core/http/views/index.html index e2cbfe03ea4c..26cc056ae0b9 100644 --- a/core/http/views/index.html +++ b/core/http/views/index.html @@ -17,15 +17,26 @@

Welcome to your LocalAI inst
- {{template "views/partials/inprogress" .}} - {{ if eq (len .ModelsConfig) 0 }} -

Ouch! seems you don't have any models installed!

+

Ouch! seems you don't have any models installed from the LocalAI gallery!

..install something from the 🖼️ Gallery or check the Getting started documentation

+ + {{ if ne (len .Models) 0 }} +
+

+ However, It seems you have installed some models installed without a configuration file: +

+ {{ range .Models }} +
+

{{.}}

+
+ {{end}} + {{end}} {{ else }} -

Installed models

-

We have {{len .ModelsConfig}} pre-loaded models available.

+ {{ $modelsN := len .ModelsConfig}} + {{ $modelsN = add $modelsN (len .Models)}} +

{{$modelsN}} Installed model(s)

@@ -76,12 +87,29 @@

Installed modelsDelete {{ end }} + {{ range .Models }} +

+ + + + + + {{end}}
+ + +

{{.}}

+
+ + auto + + + + No Configuration + +
{{ end }} - - -
diff --git a/core/http/views/talk.html b/core/http/views/talk.html index afb494e953a5..dc25d12517aa 100644 --- a/core/http/views/talk.html +++ b/core/http/views/talk.html @@ -62,7 +62,7 @@ {{ range .ModelsConfig }} - + {{ end }} @@ -76,7 +76,7 @@ {{ range .ModelsConfig }} - + {{ end }} @@ -89,7 +89,7 @@ > {{ range .ModelsConfig }} - + {{ end }} diff --git a/core/services/gallery.go b/core/services/gallery.go index 2c0ed4356dc8..45bebd4f5087 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -30,7 +30,7 @@ func NewGalleryService(appConfig *config.ApplicationConfig) *GalleryService { } } -func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64)) error { +func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error { config, err := gallery.GetGalleryConfigFromURL(req.URL, modelPath) if err != nil { @@ -39,7 +39,7 @@ func prepareModel(modelPath string, req gallery.GalleryModel, downloadStatus fun config.Files = append(config.Files, req.AdditionalFiles...) - return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus) + return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus, enforceScan) } func (g *GalleryService) UpdateStatus(s string, op *gallery.GalleryOpStatus) { @@ -127,16 +127,16 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader } else { // if the request contains a gallery name, we apply the gallery from the gallery list if op.GalleryModelName != "" { - err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback) + err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans) } else if op.ConfigURL != "" { - err = startup.InstallModels(op.Galleries, op.ConfigURL, g.appConfig.ModelPath, progressCallback, op.ConfigURL) + err = startup.InstallModels(op.Galleries, op.ConfigURL, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL) if err != nil { updateError(err) continue } err = cl.Preload(g.appConfig.ModelPath) } else { - err = prepareModel(g.appConfig.ModelPath, op.Req, progressCallback) + err = prepareModel(g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans) } } @@ -175,22 +175,22 @@ type galleryModel struct { ID string `json:"id"` } -func processRequests(modelPath string, galleries []config.Gallery, requests []galleryModel) error { +func processRequests(modelPath string, enforceScan bool, galleries []config.Gallery, requests []galleryModel) error { var err error for _, r := range requests { utils.ResetDownloadTimers() if r.ID == "" { - err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction) + err = prepareModel(modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan) } else { err = gallery.InstallModelFromGallery( - galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction) + galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction, enforceScan) } } return err } -func ApplyGalleryFromFile(modelPath, s string, galleries []config.Gallery) error { +func ApplyGalleryFromFile(modelPath, s string, enforceScan bool, galleries []config.Gallery) error { dat, err := os.ReadFile(s) if err != nil { return err @@ -201,15 +201,15 @@ func ApplyGalleryFromFile(modelPath, s string, galleries []config.Gallery) error return err } - return processRequests(modelPath, galleries, requests) + return processRequests(modelPath, enforceScan, galleries, requests) } -func ApplyGalleryFromString(modelPath, s string, galleries []config.Gallery) error { +func ApplyGalleryFromString(modelPath, s string, enforceScan bool, galleries []config.Gallery) error { var requests []galleryModel err := json.Unmarshal([]byte(s), &requests) if err != nil { return err } - return processRequests(modelPath, galleries, requests) + return processRequests(modelPath, enforceScan, galleries, requests) } diff --git a/core/services/list_models.go b/core/services/list_models.go index 825032528f37..4b578e2579fa 100644 --- a/core/services/list_models.go +++ b/core/services/list_models.go @@ -4,34 +4,19 @@ import ( "regexp" "github.com/mudler/LocalAI/core/config" - "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/model" ) -type ListModelsService struct { - bcl *config.BackendConfigLoader - ml *model.ModelLoader - appConfig *config.ApplicationConfig -} - -func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService { - return &ListModelsService{ - bcl: bcl, - ml: ml, - appConfig: appConfig, - } -} - -func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) { +func ListModels(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]string, error) { - models, err := lms.ml.ListModels() + models, err := ml.ListFilesInModelPath() if err != nil { return nil, err } var mm map[string]interface{} = map[string]interface{}{} - dataModels := []schema.OpenAIModel{} + dataModels := []string{} var filterFn func(name string) bool @@ -50,13 +35,13 @@ func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) } // Start with the known configurations - for _, c := range lms.bcl.GetAllBackendConfigs() { + for _, c := range bcl.GetAllBackendConfigs() { if excludeConfigured { mm[c.Model] = nil } if filterFn(c.Name) { - dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"}) + dataModels = append(dataModels, c.Name) } } @@ -64,7 +49,7 @@ func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) for _, m := range models { // And only adds them if they shouldn't be skipped. if _, exists := mm[m]; !exists && filterFn(m) { - dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"}) + dataModels = append(dataModels, m) } } diff --git a/core/startup/startup.go b/core/startup/startup.go index 278c8e1cd08a..55f930a48875 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -60,7 +60,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode } } - if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, nil, options.ModelsURL...); err != nil { + if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil { log.Error().Err(err).Msg("error installing models") } @@ -84,13 +84,13 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode } if options.PreloadJSONModels != "" { - if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.Galleries); err != nil { + if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil { return nil, nil, nil, err } } if options.PreloadModelsFromPath != "" { - if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.Galleries); err != nil { + if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil { return nil, nil, nil, err } } @@ -195,7 +195,6 @@ func createApplication(appConfig *config.ApplicationConfig) *core.Application { app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) app.GalleryService = services.NewGalleryService(app.ApplicationConfig) - app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) // app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() diff --git a/gallery/index.yaml b/gallery/index.yaml index 444fea53b5cc..056381873cd6 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1730,6 +1730,23 @@ - filename: Llama-3-Yggdrasil-2.0-8B-Q4_K_M.gguf sha256: 75091cf3a7145373922dbeb312c689cace89ba06215ce74b6fc7055a4b35a40c uri: huggingface://bartowski/Llama-3-Yggdrasil-2.0-8B-GGUF/Llama-3-Yggdrasil-2.0-8B-Q4_K_M.gguf +- !!merge <<: *llama3 + name: "hathor_tahsin-l3-8b-v0.85" + description: | + Hathor_Tahsin [v-0.85] is designed to seamlessly integrate the qualities of creativity, intelligence, and robust performance. + Note: Hathor_Tahsin [v0.85] is trained on 3 epochs of Private RP, STEM (Intruction/Dialogs), Opus instructons, mixture light/classical novel data, roleplaying chat pairs over llama 3 8B instruct. + Additional Note's: (Based on Hathor_Fractionate-v0.5 instead of Hathor_Aleph-v0.72, should be less repetitive than either 0.72 or 0.8) + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/MY9tjLnEG5hOQOyKk06PK.jpeg + urls: + - https://huggingface.co/Nitral-AI/Hathor_Tahsin-L3-8B-v0.85 + - https://huggingface.co/bartowski/Hathor_Tahsin-L3-8B-v0.85-GGUF + overrides: + parameters: + model: Hathor_Tahsin-L3-8B-v0.85-Q4_K_M.gguf + files: + - filename: Hathor_Tahsin-L3-8B-v0.85-Q4_K_M.gguf + sha256: c82f39489e767a842925fc58cafb5dec0cc71313d904a53fdb46186be899ecb0 + uri: huggingface://bartowski/Hathor_Tahsin-L3-8B-v0.85-GGUF/Hathor_Tahsin-L3-8B-v0.85-Q4_K_M.gguf - name: "llama-3-sec-chat" url: "github:mudler/LocalAI/gallery/chatml.yaml@master" urls: diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go index de575d63fc97..1f88bbb14c43 100644 --- a/pkg/downloader/uri.go +++ b/pkg/downloader/uri.go @@ -3,6 +3,8 @@ package downloader import ( "crypto/sha256" "encoding/base64" + "encoding/json" + "errors" "fmt" "io" "net/http" @@ -129,6 +131,7 @@ func ConvertURL(s string) string { // e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf owner := strings.Split(repository, "/")[0] repo := strings.Split(repository, "/")[1] + branch := "main" if strings.Contains(repo, "@") { branch = strings.Split(repository, "@")[1] @@ -353,3 +356,42 @@ func calculateSHA(filePath string) (string, error) { return fmt.Sprintf("%x", hash.Sum(nil)), nil } + +type HuggingFaceScanResult struct { + RepositoryId string `json:"repositoryId"` + Revision string `json:"revision"` + HasUnsafeFiles bool `json:"hasUnsafeFile"` + ClamAVInfectedFiles []string `json:"clamAVInfectedFiles"` + DangerousPickles []string `json:"dangerousPickles"` + ScansDone bool `json:"scansDone"` +} + +var ErrNonHuggingFaceFile = errors.New("not a huggingface repo") +var ErrUnsafeFilesFound = errors.New("unsafe files found") + +func HuggingFaceScan(uri string) (*HuggingFaceScanResult, error) { + cleanParts := strings.Split(ConvertURL(uri), "/") + if len(cleanParts) <= 4 || cleanParts[2] != "huggingface.co" { + return nil, ErrNonHuggingFaceFile + } + results, err := http.Get(fmt.Sprintf("https://huggingface.co/api/models/%s/%s/scan", cleanParts[3], cleanParts[4])) + if err != nil { + return nil, err + } + if results.StatusCode != 200 { + return nil, fmt.Errorf("unexpected status code during HuggingFaceScan: %d", results.StatusCode) + } + scanResult := &HuggingFaceScanResult{} + bodyBytes, err := io.ReadAll(results.Body) + if err != nil { + return nil, err + } + err = json.Unmarshal(bodyBytes, scanResult) + if err != nil { + return nil, err + } + if scanResult.HasUnsafeFiles { + return scanResult, ErrUnsafeFilesFound + } + return scanResult, nil +} diff --git a/pkg/model/loader.go b/pkg/model/loader.go index faaacdd4c455..c7159f7eca6d 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -30,7 +30,6 @@ type PromptTemplateData struct { MessageIndex int } -// TODO: Ask mudler about FunctionCall stuff being useful at the message level? type ChatMessageTemplateData struct { SystemPrompt string Role string @@ -87,22 +86,49 @@ func (ml *ModelLoader) ExistsInModelPath(s string) bool { return utils.ExistsInPath(ml.ModelPath, s) } -func (ml *ModelLoader) ListModels() ([]string, error) { +var knownFilesToSkip []string = []string{ + "MODEL_CARD", + "README", + "README.md", +} + +var knownModelsNameSuffixToSkip []string = []string{ + ".tmpl", + ".keep", + ".yaml", + ".yml", + ".json", + ".DS_Store", + ".", + ".partial", + ".tar.gz", +} + +func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) { files, err := os.ReadDir(ml.ModelPath) if err != nil { return []string{}, err } models := []string{} +FILE: for _, file := range files { - // Skip templates, YAML, .keep, .json, and .DS_Store files - TODO: as this list grows, is there a more efficient method? - if strings.HasSuffix(file.Name(), ".tmpl") || - strings.HasSuffix(file.Name(), ".keep") || - strings.HasSuffix(file.Name(), ".yaml") || - strings.HasSuffix(file.Name(), ".yml") || - strings.HasSuffix(file.Name(), ".json") || - strings.HasSuffix(file.Name(), ".DS_Store") || - strings.HasPrefix(file.Name(), ".") { + + for _, skip := range knownFilesToSkip { + if strings.EqualFold(file.Name(), skip) { + continue FILE + } + } + + // Skip templates, YAML, .keep, .json, and .DS_Store files + for _, skip := range knownModelsNameSuffixToSkip { + if strings.HasSuffix(file.Name(), skip) { + continue FILE + } + } + + // Skip directories + if file.IsDir() { continue } diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index d678f283b05a..9fa890b0f9f6 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -3,6 +3,7 @@ package startup import ( "errors" "fmt" + "net/url" "os" "path/filepath" "strings" @@ -18,7 +19,7 @@ import ( // InstallModels will preload models from the given list of URLs and galleries // It will download the model if it is not already present in the model path // It will also try to resolve if the model is an embedded model YAML configuration -func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath string, downloadStatus func(string, string, string, float64), models ...string) error { +func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error { // create an error that groups all errors var err error @@ -77,19 +78,35 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath log.Info().Msgf("[startup] installed model from OCI repository: %s", ociName) case downloader.LooksLikeURL(url): - log.Debug().Msgf("[startup] resolved model to download: %s", url) + log.Debug().Msgf("[startup] downloading %s", url) + + // Extract filename from URL + fileName, e := filenameFromUrl(url) + if e != nil || fileName == "" { + fileName = utils.MD5(url) + if strings.HasSuffix(url, ".yaml") || strings.HasSuffix(url, ".yml") { + fileName = fileName + ".yaml" + } + log.Warn().Err(e).Str("url", url).Msg("error extracting filename from URL") + //err = errors.Join(err, e) + //continue + } - // md5 of model name - md5Name := utils.MD5(url) + modelPath := filepath.Join(modelPath, fileName) + + if e := utils.VerifyPath(fileName, modelPath); e != nil { + log.Error().Err(e).Str("filepath", modelPath).Msg("error verifying path") + err = errors.Join(err, e) + continue + } // check if file exists - if _, e := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(e, os.ErrNotExist) { - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - e := downloader.DownloadFile(url, modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) { + if _, e := os.Stat(modelPath); errors.Is(e, os.ErrNotExist) { + e := downloader.DownloadFile(url, modelPath, "", 0, 0, func(fileName, current, total string, percent float64) { utils.DisplayDownloadFunction(fileName, current, total, percent) }) if e != nil { - log.Error().Err(e).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model") + log.Error().Err(e).Str("url", url).Str("filepath", modelPath).Msg("error downloading model") err = errors.Join(err, e) } } @@ -113,7 +130,7 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath } } else { // Check if it's a model gallery, or print a warning - e, found := installModel(galleries, url, modelPath, downloadStatus) + e, found := installModel(galleries, url, modelPath, downloadStatus, enforceScan) if e != nil && found { log.Error().Err(err).Msgf("[startup] failed installing model '%s'", url) err = errors.Join(err, e) @@ -127,7 +144,7 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath return err } -func installModel(galleries []config.Gallery, modelName, modelPath string, downloadStatus func(string, string, string, float64)) (error, bool) { +func installModel(galleries []config.Gallery, modelName, modelPath string, downloadStatus func(string, string, string, float64), enforceScan bool) (error, bool) { models, err := gallery.AvailableGalleryModels(galleries, modelPath) if err != nil { return err, false @@ -143,10 +160,27 @@ func installModel(galleries []config.Gallery, modelName, modelPath string, downl } log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model") - err = gallery.InstallModelFromGallery(galleries, modelName, modelPath, gallery.GalleryModel{}, downloadStatus) + err = gallery.InstallModelFromGallery(galleries, modelName, modelPath, gallery.GalleryModel{}, downloadStatus, enforceScan) if err != nil { return err, true } return nil, true } + +func filenameFromUrl(urlstr string) (string, error) { + // strip anything after @ + if strings.Contains(urlstr, "@") { + urlstr = strings.Split(urlstr, "@")[0] + } + + u, err := url.Parse(urlstr) + if err != nil { + return "", fmt.Errorf("error due to parsing url: %w", err) + } + x, err := url.QueryUnescape(u.EscapedPath()) + if err != nil { + return "", fmt.Errorf("error due to escaping: %w", err) + } + return filepath.Base(x), nil +} diff --git a/pkg/startup/model_preload_test.go b/pkg/startup/model_preload_test.go index e3d7d9793a10..869fcd3e33c1 100644 --- a/pkg/startup/model_preload_test.go +++ b/pkg/startup/model_preload_test.go @@ -20,9 +20,9 @@ var _ = Describe("Preload test", func() { tmpdir, err := os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) libraryURL := "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml" - fileName := fmt.Sprintf("%s.yaml", "1701d57f28d47552516c2b6ecc3cc719") + fileName := fmt.Sprintf("%s.yaml", "phi-2") - InstallModels([]config.Gallery{}, libraryURL, tmpdir, nil, "phi-2") + InstallModels([]config.Gallery{}, libraryURL, tmpdir, true, nil, "phi-2") resultFile := filepath.Join(tmpdir, fileName) @@ -36,9 +36,9 @@ var _ = Describe("Preload test", func() { tmpdir, err := os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) url := "https://raw.githubusercontent.com/mudler/LocalAI/master/examples/configurations/phi-2.yaml" - fileName := fmt.Sprintf("%s.yaml", utils.MD5(url)) + fileName := fmt.Sprintf("%s.yaml", "phi-2") - InstallModels([]config.Gallery{}, "", tmpdir, nil, url) + InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url) resultFile := filepath.Join(tmpdir, fileName) @@ -52,7 +52,7 @@ var _ = Describe("Preload test", func() { Expect(err).ToNot(HaveOccurred()) url := "phi-2" - InstallModels([]config.Gallery{}, "", tmpdir, nil, url) + InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url) entry, err := os.ReadDir(tmpdir) Expect(err).ToNot(HaveOccurred()) @@ -70,7 +70,7 @@ var _ = Describe("Preload test", func() { url := "mistral-openorca" fileName := fmt.Sprintf("%s.yaml", utils.MD5(url)) - InstallModels([]config.Gallery{}, "", tmpdir, nil, url) + InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url) resultFile := filepath.Join(tmpdir, fileName) @@ -79,5 +79,19 @@ var _ = Describe("Preload test", func() { Expect(string(content)).To(ContainSubstring("name: mistral-openorca")) }) + It("downloads from urls", func() { + tmpdir, err := os.MkdirTemp("", "") + Expect(err).ToNot(HaveOccurred()) + url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf" + fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K") + + err = InstallModels([]config.Gallery{}, "", tmpdir, false, nil, url) + Expect(err).ToNot(HaveOccurred()) + + resultFile := filepath.Join(tmpdir, fileName) + + _, err = os.Stat(resultFile) + Expect(err).ToNot(HaveOccurred()) + }) }) }) diff --git a/swagger/docs.go b/swagger/docs.go index b894258a6e14..513a6dea6af5 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -213,6 +213,43 @@ const docTemplate = `{ } } }, + "/v1/files/{file_id}": { + "get": { + "summary": "Returns information about a specific file.", + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/openai.File" + } + } + } + }, + "delete": { + "summary": "Delete a file.", + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/openai.DeleteStatus" + } + } + } + } + }, + "/v1/files/{file_id}/content": { + "get": { + "summary": "Returns information about a specific file.", + "responses": { + "200": { + "description": "file", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/images/generations": { "post": { "summary": "Creates an image given a prompt.", @@ -237,6 +274,19 @@ const docTemplate = `{ } } }, + "/v1/models": { + "get": { + "summary": "List and describe the various models available in the API.", + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.ModelsDataResponse" + } + } + } + } + }, "/v1/text-to-speech/{voice-id}": { "post": { "summary": "Generates audio from the input text.", @@ -491,6 +541,49 @@ const docTemplate = `{ } } }, + "openai.DeleteStatus": { + "type": "object", + "properties": { + "deleted": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "object": { + "type": "string" + } + } + }, + "openai.File": { + "type": "object", + "properties": { + "bytes": { + "description": "Size of the file in bytes", + "type": "integer" + }, + "created_at": { + "description": "The time at which the file was created", + "type": "string" + }, + "filename": { + "description": "The name of the file", + "type": "string" + }, + "id": { + "description": "Unique identifier for the file", + "type": "string" + }, + "object": { + "description": "Type of the object (e.g., \"file\")", + "type": "string" + }, + "purpose": { + "description": "The purpose of the file (e.g., \"fine-tune\", \"classifications\", etc.)", + "type": "string" + } + } + }, "openai.Tool": { "type": "object", "properties": { @@ -601,6 +694,31 @@ const docTemplate = `{ } } }, + "schema.ModelsDataResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.OpenAIModel" + } + }, + "object": { + "type": "string" + } + } + }, + "schema.OpenAIModel": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "object": { + "type": "string" + } + } + }, "schema.OpenAIRequest": { "type": "object", "required": [ diff --git a/swagger/swagger.json b/swagger/swagger.json index 5e919ef8e74c..aedfd4385e56 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -206,6 +206,43 @@ } } }, + "/v1/files/{file_id}": { + "get": { + "summary": "Returns information about a specific file.", + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/openai.File" + } + } + } + }, + "delete": { + "summary": "Delete a file.", + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/openai.DeleteStatus" + } + } + } + } + }, + "/v1/files/{file_id}/content": { + "get": { + "summary": "Returns information about a specific file.", + "responses": { + "200": { + "description": "file", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/images/generations": { "post": { "summary": "Creates an image given a prompt.", @@ -230,6 +267,19 @@ } } }, + "/v1/models": { + "get": { + "summary": "List and describe the various models available in the API.", + "responses": { + "200": { + "description": "Response", + "schema": { + "$ref": "#/definitions/schema.ModelsDataResponse" + } + } + } + } + }, "/v1/text-to-speech/{voice-id}": { "post": { "summary": "Generates audio from the input text.", @@ -484,6 +534,49 @@ } } }, + "openai.DeleteStatus": { + "type": "object", + "properties": { + "deleted": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "object": { + "type": "string" + } + } + }, + "openai.File": { + "type": "object", + "properties": { + "bytes": { + "description": "Size of the file in bytes", + "type": "integer" + }, + "created_at": { + "description": "The time at which the file was created", + "type": "string" + }, + "filename": { + "description": "The name of the file", + "type": "string" + }, + "id": { + "description": "Unique identifier for the file", + "type": "string" + }, + "object": { + "description": "Type of the object (e.g., \"file\")", + "type": "string" + }, + "purpose": { + "description": "The purpose of the file (e.g., \"fine-tune\", \"classifications\", etc.)", + "type": "string" + } + } + }, "openai.Tool": { "type": "object", "properties": { @@ -594,6 +687,31 @@ } } }, + "schema.ModelsDataResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.OpenAIModel" + } + }, + "object": { + "type": "string" + } + } + }, + "schema.OpenAIModel": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "object": { + "type": "string" + } + } + }, "schema.OpenAIRequest": { "type": "object", "required": [ diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 18a51a045783..9c23c8118b40 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -148,6 +148,37 @@ definitions: $ref: '#/definitions/openai.Tool' type: array type: object + openai.DeleteStatus: + properties: + deleted: + type: boolean + id: + type: string + object: + type: string + type: object + openai.File: + properties: + bytes: + description: Size of the file in bytes + type: integer + created_at: + description: The time at which the file was created + type: string + filename: + description: The name of the file + type: string + id: + description: Unique identifier for the file + type: string + object: + description: Type of the object (e.g., "file") + type: string + purpose: + description: The purpose of the file (e.g., "fine-tune", "classifications", + etc.) + type: string + type: object openai.Tool: properties: type: @@ -222,6 +253,22 @@ definitions: $ref: '#/definitions/schema.ToolCall' type: array type: object + schema.ModelsDataResponse: + properties: + data: + items: + $ref: '#/definitions/schema.OpenAIModel' + type: array + object: + type: string + type: object + schema.OpenAIModel: + properties: + id: + type: string + object: + type: string + type: object schema.OpenAIRequest: properties: backend: @@ -533,6 +580,29 @@ paths: $ref: '#/definitions/schema.OpenAIResponse' summary: Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. + /v1/files/{file_id}: + delete: + responses: + "200": + description: Response + schema: + $ref: '#/definitions/openai.DeleteStatus' + summary: Delete a file. + get: + responses: + "200": + description: Response + schema: + $ref: '#/definitions/openai.File' + summary: Returns information about a specific file. + /v1/files/{file_id}/content: + get: + responses: + "200": + description: file + schema: + type: string + summary: Returns information about a specific file. /v1/images/generations: post: parameters: @@ -548,6 +618,14 @@ paths: schema: $ref: '#/definitions/schema.OpenAIResponse' summary: Creates an image given a prompt. + /v1/models: + get: + responses: + "200": + description: Response + schema: + $ref: '#/definitions/schema.ModelsDataResponse' + summary: List and describe the various models available in the API. /v1/text-to-speech/{voice-id}: post: parameters: