Merge branch 'nomic-ai:main' into main

abdulrahman305 · Sep 1, 2023 · 1400c06 · 1400c06
2 parents 2afa13a + a69d23e
commit 1400c06
Show file tree

Hide file tree

Showing 21 changed files with 616 additions and 87 deletions.
diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml
@@ -41,10 +41,12 @@ jobs:
       - restore_cache:  # this is the new step to restore cache
           keys:
             - linux-qt-cache
-      - run: 
+      - run:
           name: Setup Linux and Dependencies
           command: |
-            sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev
+            wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
+            sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
+            sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev vulkan-sdk
       - run:
           name: Installing Qt
           command: |
@@ -92,12 +94,18 @@ jobs:
           key: windows-qt-cache
           paths:
             - C:\Qt
+      - run:
+          name: Install VulkanSDK
+          command: |
+            Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
+            .\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
       - run:
           name: Build
           command: |
             $Env:PATH = "${Env:PATH};C:\Program Files (x86)\Windows Kits\10\bin\x64"
             $Env:PATH = "${Env:PATH};C:\Program Files (x86)\Windows Kits\10\bin\10.0.22000.0\x64"
             $Env:PATH = "${Env:PATH};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\bin\HostX64\x64"
+            $Env:PATH = "${Env:PATH};C:\VulkanSDK\1.3.261.1\bin"
             $Env:LIB = "${Env:LIB};C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22000.0\ucrt\x64"
             $Env:LIB = "${Env:LIB};C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22000.0\um\x64"
             $Env:LIB = "${Env:LIB};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\lib\x64"
@@ -117,6 +125,7 @@ jobs:
               "-DCMAKE_BUILD_TYPE=Release" `
               "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_64" `
               "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" `
+              "-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON" `
               "-S ..\gpt4all-chat" `
               "-B ."
             & "C:\Qt\Tools\Ninja\ninja.exe"
@@ -205,15 +214,20 @@ jobs:
           command: aws cloudfront create-invalidation --distribution-id E1STQOW63QL2OH --paths "/*"
 
   build-py-linux:
-    docker:
-      - image: circleci/python:3.8
+    machine:
+      image: ubuntu-2204:2023.04.2
     steps:
       - checkout
+      - run:
+          name: Set Python Version
+          command: pyenv global 3.11.2
       - run:
           name: Install dependencies
           command: |
+            wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
+            sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
             sudo apt-get update
-            sudo apt-get install -y cmake build-essential
+            sudo apt-get install -y cmake build-essential vulkan-sdk
             pip install setuptools wheel cmake
       - run:
           name: Build C library
@@ -277,9 +291,15 @@ jobs:
       - run:
           name: Add MinGW64 to PATH
           command: $env:Path += ";C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin"
+      - run:
+          name: Install VulkanSDK
+          command: |
+            Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
+            .\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
       - run:
           name: Install dependencies
-          command: choco install -y cmake --installargs 'ADD_CMAKE_TO_PATH=System'
+          command:
+            choco install -y cmake --installargs 'ADD_CMAKE_TO_PATH=System'
       - run:
           name: Install Python dependencies
           command: pip install setuptools wheel cmake
@@ -291,7 +311,8 @@ jobs:
             cd gpt4all-backend
             mkdir build
             cd build
-            cmake -G "MinGW Makefiles" .. 
+            $env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
+            cmake -G "MinGW Makefiles" .. -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON
             cmake --build . --parallel
       - run:
           name: Build wheel
@@ -343,8 +364,10 @@ jobs:
       - run:
           name: Install dependencies
           command: |
+            wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
+            sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list            
             sudo apt-get update
-            sudo apt-get install -y cmake build-essential
+            sudo apt-get install -y cmake build-essential vulkan-sdk
       - run:
           name: Build Libraries
           command: |
@@ -407,6 +430,11 @@ jobs:
       - run:
           name: Install MinGW64
           command: choco install -y mingw --force --no-progress
+      - run:
+          name: Install VulkanSDK
+          command: |
+            Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
+            .\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
       - run:
           name: Install dependencies
           command: |
@@ -417,10 +445,11 @@ jobs:
             $MinGWBin = "C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin"
             $Env:Path += ";$MinGwBin"
             $Env:Path += ";C:\Program Files\CMake\bin"
+            $Env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
             cd gpt4all-backend
             mkdir runtimes/win-x64
             cd runtimes/win-x64
-            cmake -G "MinGW Makefiles" ../..
+            cmake -G "MinGW Makefiles" -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON ../..
             cmake --build . --parallel --config Release
             cp "$MinGWBin\libgcc*.dll" .
             cp "$MinGWBin\libstdc++*.dll" .
@@ -443,6 +472,11 @@ jobs:
           command: |
             git submodule sync
             git submodule update --init --recursive
+      - run:
+          name: Install VulkanSDK
+          command: |
+            Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
+            .\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
       - run:
           name: Install dependencies
           command: |
@@ -451,10 +485,11 @@ jobs:
           name: Build Libraries
           command: |
             $Env:Path += ";C:\Program Files\CMake\bin"
+            $Env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
             cd gpt4all-backend
             mkdir runtimes/win-x64_msvc
             cd runtimes/win-x64_msvc
-            cmake -G "Visual Studio 17 2022" -A X64 ../..
+            cmake -G "Visual Studio 17 2022" -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -A X64 ../..
             cmake --build . --parallel --config Release
             cp bin/Release/*.dll .
       - persist_to_workspace:

diff --git a/.codespellrc b/.codespellrc
@@ -1,3 +1,3 @@
 [codespell]
-ignore-words-list = blong, belong, afterall
+ignore-words-list = blong, belong, afterall, som
 skip = .git,*.pdf,*.svg,*.lock
diff --git a/LICENSE_SOM.txt b/LICENSE_SOM.txt
@@ -0,0 +1,30 @@
+Software for Open Models License (SOM)
+Version 1.0 dated August 30th, 2023
+
+This license governs use of the accompanying Software. If you use the Software, you accept this license. If you do not accept the license, do not use the Software.
+
+This license is intended to encourage open release of models created, modified, processed, or otherwise used via the Software under open licensing terms, and should be interpreted in light of that intent.
+
+1. Definitions
+The “Licensor” is the person or entity who is making the Software available under this license. “Software” is the software made available by Licensor under this license.
+A “Model” is the output of a machine learning algorithm, and excludes the Software.
+“Model Source Materials” must include the Model and model weights, and may include any input data, input data descriptions, documentation or training descriptions for the Model.
+“Open Licensing Terms” means: (a) any open source license approved by the Open Source Initiative, or (b) any other terms that make the Model Source Materials publicly available free of charge, and allow recipients to use, modify and distribute the Model Source Materials. Terms described in (b) may include reasonable restrictions such as non-commercial or non-production limitations, or require use in compliance with law.
+
+2. Grant of Rights. Subject to the conditions and limitations in section 3:
+(A) Copyright Grant. Licensor grants you a non-exclusive, worldwide, royalty-free copyright license to copy, modify, and distribute the Software and any modifications of the Software you create under this license. The foregoing license includes without limitation the right to create, modify, and use Models using this Software.
+
+(B) Patent Grant. Licensor grants you a non-exclusive, worldwide, royalty-free license, under any patents owned or controlled by Licensor, to make, have made, use, sell, offer for sale, import, or otherwise exploit the Software.  No license is granted to patent rights that are not embodied in the operation of the Software in the form provided by Licensor.
+
+3. Conditions and Limitations
+(A) Model Licensing and Access. If you use the Software to create, modify, process, or otherwise use any Model, including usage to create inferences with a Model, whether or not you make the Model available to others, you must make that Model Source Materials publicly available under Open Licensing Terms. 
+
+(B) No Re-Licensing. If you redistribute the Software, or modifications to the Software made under the license granted above, you must make it available only under the terms of this license. You may offer additional terms such as warranties, maintenance and support, but You, and not Licensor, are responsible for performing such terms.
+
+(C) No Trademark License. This license does not grant you rights to use the Licensor’s name, logo, or trademarks.
+
+(D) If you assert in writing a claim against any person or entity alleging that the use of the Software infringes any patent, all of your licenses to the Software under Section 2 end automatically as of the date you asserted the claim.
+
+(E) If you distribute any portion of the Software, you must retain all copyright, patent, trademark, and attribution notices that are present in the Software, and you must include a copy of this license.
+
+(F) The Software is licensed “as-is.” You bear the entire risk of using it. Licensor gives You no express warranties, guarantees or conditions. You may have additional consumer rights under your local laws that this license cannot change. To the extent permitted under your local laws, the Licensor disclaims and excludes the implied warranties of merchantability, fitness for a particular purpose and non-infringement. To the extent this disclaimer is unlawful, you, and not Licensor, are responsible for any liability.
diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt
@@ -20,7 +20,7 @@ endif()
 include_directories("${CMAKE_CURRENT_BINARY_DIR}")
 
 set(LLMODEL_VERSION_MAJOR 0)
-set(LLMODEL_VERSION_MINOR 3)
+set(LLMODEL_VERSION_MINOR 4)
 set(LLMODEL_VERSION_PATCH 0)
 set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
 project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
@@ -39,6 +39,10 @@ else()
     message(STATUS "Interprocedural optimization support detected")
 endif()
 
+if(NOT APPLE)
+  set(LLAMA_KOMPUTE YES)
+endif()
+
 include(llama.cpp.cmake)
 
 set(BUILD_VARIANTS default avxonly)
@@ -69,11 +73,6 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
     # Include GGML
     set(LLAMA_K_QUANTS YES)
     include_ggml(llama.cpp-mainline -mainline-${BUILD_VARIANT} ON)
-    if (NOT LLAMA_METAL)
-        set(LLAMA_K_QUANTS NO)
-        include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON)
-        include_ggml(llama.cpp-230519 -230519-${BUILD_VARIANT} ON)
-    endif()
 
     # Function for preparing individual implementations
     function(prepare_target TARGET_NAME BASE_LIB)
@@ -100,38 +99,32 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
 
     add_library(replit-mainline-${BUILD_VARIANT} SHARED
     replit.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
+    target_compile_definitions(replit-mainline-${BUILD_VARIANT} PRIVATE LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
     prepare_target(replit-mainline llama-mainline)
 
     if (NOT LLAMA_METAL)
-        add_library(llamamodel-230519-${BUILD_VARIANT} SHARED
-            llamamodel.cpp llmodel_shared.cpp)
-        target_compile_definitions(llamamodel-230519-${BUILD_VARIANT} PRIVATE
-            LLAMA_VERSIONS===2 LLAMA_DATE=230519)
-        prepare_target(llamamodel-230519 llama-230519)
-        add_library(llamamodel-230511-${BUILD_VARIANT} SHARED
-            llamamodel.cpp llmodel_shared.cpp)
-        target_compile_definitions(llamamodel-230511-${BUILD_VARIANT} PRIVATE
-            LLAMA_VERSIONS=<=1 LLAMA_DATE=230511)
-        prepare_target(llamamodel-230511 llama-230511)
-
-        add_library(gptj-${BUILD_VARIANT} SHARED
-            gptj.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
-        prepare_target(gptj ggml-230511)
+# FIXME: These need to be forward ported to latest ggml
+#        add_library(gptj-${BUILD_VARIANT} SHARED
+#            gptj.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
+#        prepare_target(gptj ggml-230511)
 
         add_library(falcon-${BUILD_VARIANT} SHARED
             falcon.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
+        target_compile_definitions(falcon-${BUILD_VARIANT} PRIVATE LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
         prepare_target(falcon llama-mainline)
-
-        add_library(mpt-${BUILD_VARIANT} SHARED
-            mpt.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
-        prepare_target(mpt ggml-230511)
+# FIXME: These need to be forward ported to latest ggml
+#        add_library(mpt-${BUILD_VARIANT} SHARED
+#            mpt.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
+#        prepare_target(mpt ggml-230511)
 
         add_library(bert-${BUILD_VARIANT} SHARED
             bert.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
+        target_compile_definitions(bert-${BUILD_VARIANT} PRIVATE LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
         prepare_target(bert llama-mainline)
 
         add_library(starcoder-${BUILD_VARIANT} SHARED
             starcoder.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
+        target_compile_definitions(starcoder-${BUILD_VARIANT} PRIVATE LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
         prepare_target(starcoder llama-mainline)
     endif()
 endforeach()

diff --git a/gpt4all-backend/bert.cpp b/gpt4all-backend/bert.cpp
@@ -1,5 +1,6 @@
 #define BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
 #include "bert_impl.h"
+#include "llmodel_shared.h"
 #include "ggml.h"
 
 #include <cassert>
@@ -91,22 +92,6 @@ struct bert_model
 };
 
 // Replacement for std::vector<uint8_t> that doesn't require zero-initialization.
-struct bert_buffer {
-    uint8_t * data = NULL;
-    size_t size = 0;
-
-    void resize(size_t size) {
-        delete[] data;
-        data = new uint8_t[size];
-        this->size = size;
-    }
-
-    ~bert_buffer() {
-        delete[] data;
-    }
-};
-
-
 struct bert_ctx
 {
     bert_model model;
@@ -115,7 +100,8 @@ struct bert_ctx
     size_t mem_per_token;
     int64_t mem_per_input;
     int32_t max_batch_n;
-    bert_buffer buf_compute;
+    llm_buffer buf_compute;
+    llm_buffer work_buf;
 };
 
 int32_t bert_n_embd(bert_ctx * ctx)
@@ -328,13 +314,12 @@ void bert_eval(
 
     struct ggml_init_params params = {
         .mem_size = buf_compute.size,
-        .mem_buffer = buf_compute.data,
+        .mem_buffer = buf_compute.addr,
         .no_alloc = false,
     };
 
     struct ggml_context *ctx0 = ggml_init(params);
     struct ggml_cgraph gf = {};
-    gf.n_threads = n_threads;
 
     // Embeddings. word_embeddings + token_type_embeddings + position_embeddings
     struct ggml_tensor *token_layer = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
@@ -466,7 +451,9 @@ void bert_eval(
     ggml_tensor *output = inpL;
     // run the computation
     ggml_build_forward_expand(&gf, output);
-    ggml_graph_compute(ctx0, &gf);
+    //ggml_graph_compute_g4a()
+    ggml_graph_compute_g4a(ctx->work_buf, &gf, n_threads);
+    //ggml_graph_compute(ctx0, &gf);
 
 
     // float *dat = ggml_get_data_f32(output);
@@ -633,7 +620,7 @@ struct bert_ctx * bert_load_from_file(const char *fname)
         model_mem_req += n_layer * (n_intermediate * ggml_type_sizef(GGML_TYPE_F32)); // ff_i_b
         model_mem_req += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ff_o_b
 
-        model_mem_req += (5 + 16 * n_layer) * 256; // object overhead
+        model_mem_req += (5 + 16 * n_layer) * ggml_tensor_overhead(); // object overhead
 
 #if defined(DEBUG_BERT)
         printf("%s: ggml ctx size = %6.2f MB\n", __func__, model_mem_req / (1024.0 * 1024.0));
@@ -1063,4 +1050,4 @@ DLL_EXPORT bool magic_match(std::istream& f) {
 DLL_EXPORT LLModel *construct() {
     return new Bert;
 }
-}
+}
diff --git a/gpt4all-backend/dlhandle.h b/gpt4all-backend/dlhandle.h
@@ -75,7 +75,7 @@ class Dlhandle {
 
     Dlhandle() : chandle(nullptr) {}
     Dlhandle(const std::string& fpath) {
-        chandle = LoadLibraryA(fpath.c_str());
+        chandle = LoadLibraryExA(fpath.c_str(), NULL, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR);
         if (!chandle) {
             throw Exception("dlopen(\""+fpath+"\"): Error");
         }