From d948a4f2ee88922b2861b19eb7e7660921f7bf67 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Wed, 7 Feb 2024 09:37:59 -0500 Subject: [PATCH 01/17] Complete revamp of model loading to allow for more discreet control by the user of the models loading behavior. Signed-off-by: Adam Treat --- gpt4all-backend/llamamodel.cpp | 3 + gpt4all-backend/llmodel.h | 13 + gpt4all-chat/CMakeLists.txt | 3 + gpt4all-chat/chat.cpp | 57 ++-- gpt4all-chat/chat.h | 18 +- gpt4all-chat/chatlistmodel.h | 4 +- gpt4all-chat/chatllm.cpp | 93 ++++++- gpt4all-chat/chatllm.h | 11 +- gpt4all-chat/icons/eject.svg | 6 + gpt4all-chat/main.qml | 372 ++++++++++++++++--------- gpt4all-chat/qml/MyButton.qml | 5 +- gpt4all-chat/qml/MyMiniButton.qml | 47 ++++ gpt4all-chat/qml/SwitchModelDialog.qml | 44 +++ gpt4all-chat/qml/Theme.qml | 1 + 14 files changed, 504 insertions(+), 173 deletions(-) create mode 100644 gpt4all-chat/icons/eject.svg create mode 100644 gpt4all-chat/qml/MyMiniButton.qml create mode 100644 gpt4all-chat/qml/SwitchModelDialog.qml diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 5b9960fff1c1..0dd9de5d96ed 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -180,6 +180,9 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl) d_ptr->model_params.use_mlock = params.use_mlock; #endif + d_ptr->model_params.progress_callback = &LLModel::staticProgressCallback; + d_ptr->model_params.progress_callback_user_data = this; + #ifdef GGML_USE_METAL if (llama_verbose()) { std::cerr << "llama.cpp: using Metal" << std::endl; diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/llmodel.h index 7fc5e71dc902..c3cc937c0f72 100644 --- a/gpt4all-backend/llmodel.h +++ b/gpt4all-backend/llmodel.h @@ -74,6 +74,8 @@ class LLModel { int32_t n_last_batch_tokens = 0; }; + using ProgressCallback = std::function; + explicit LLModel() {} virtual ~LLModel() {} @@ -125,6 +127,8 @@ class LLModel { virtual bool hasGPUDevice() { return false; } virtual bool usingGPUDevice() { return false; } + void setProgressCallback(ProgressCallback callback) { m_progressCallback = callback; } + protected: // These are pure virtual because subclasses need to implement as the default implementation of // 'prompt' above calls these functions @@ -153,6 +157,15 @@ class LLModel { const Implementation *m_implementation = nullptr; + ProgressCallback m_progressCallback; + static bool staticProgressCallback(float progress, void* ctx) + { + LLModel* model = static_cast(ctx); + if (model && model->m_progressCallback) + return model->m_progressCallback(progress); + return true; + } + private: friend class LLMImplementation; }; diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index ee72f8463e42..0f9d0ab0f2e9 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -109,6 +109,7 @@ qt_add_qml_module(chat qml/ModelSettings.qml qml/ApplicationSettings.qml qml/LocalDocsSettings.qml + qml/SwitchModelDialog.qml qml/MySettingsTab.qml qml/MySettingsStack.qml qml/MySettingsDestructiveButton.qml @@ -123,6 +124,7 @@ qt_add_qml_module(chat qml/MyTextField.qml qml/MyCheckBox.qml qml/MyBusyIndicator.qml + qml/MyMiniButton.qml qml/MyToolButton.qml RESOURCES icons/send_message.svg @@ -133,6 +135,7 @@ qt_add_qml_module(chat icons/db.svg icons/download.svg icons/settings.svg + icons/eject.svg icons/edit.svg icons/image.svg icons/trash.svg diff --git a/gpt4all-chat/chat.cpp b/gpt4all-chat/chat.cpp index 0e66c5c20bfd..8730adbcee05 100644 --- a/gpt4all-chat/chat.cpp +++ b/gpt4all-chat/chat.cpp @@ -23,14 +23,10 @@ Chat::Chat(bool isServer, QObject *parent) , m_id(Network::globalInstance()->generateUniqueId()) , m_name(tr("Server Chat")) , m_chatModel(new ChatModel(this)) - , m_responseInProgress(false) , m_responseState(Chat::ResponseStopped) , m_creationDate(QDateTime::currentSecsSinceEpoch()) , m_llmodel(new Server(this)) , m_isServer(true) - , m_shouldDeleteLater(false) - , m_isModelLoaded(false) - , m_shouldLoadModelWhenInstalled(false) , m_collectionModel(new LocalDocsCollectionsModel(this)) { connectLLM(); @@ -45,7 +41,7 @@ Chat::~Chat() void Chat::connectLLM() { // Should be in different threads - connect(m_llmodel, &ChatLLM::isModelLoadedChanged, this, &Chat::handleModelLoadedChanged, Qt::QueuedConnection); + connect(m_llmodel, &ChatLLM::modelLoadingPercentageChanged, this, &Chat::handleModelLoadingPercentageChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::responseChanged, this, &Chat::handleResponseChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::promptProcessing, this, &Chat::promptProcessing, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::responseStopped, this, &Chat::responseStopped, Qt::QueuedConnection); @@ -57,6 +53,7 @@ void Chat::connectLLM() connect(m_llmodel, &ChatLLM::reportFallbackReason, this, &Chat::handleFallbackReasonChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::modelInfoChanged, this, &Chat::handleModelInfoChanged, Qt::QueuedConnection); + connect(m_llmodel, &ChatLLM::trySwitchContextOfLoadedModelCompleted, this, &Chat::trySwitchContextOfLoadedModelCompleted, Qt::QueuedConnection); connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection); connect(this, &Chat::modelChangeRequested, m_llmodel, &ChatLLM::modelChangeRequested, Qt::QueuedConnection); @@ -69,8 +66,6 @@ void Chat::connectLLM() connect(this, &Chat::processSystemPromptRequested, m_llmodel, &ChatLLM::processSystemPrompt, Qt::QueuedConnection); connect(this, &Chat::collectionListChanged, m_collectionModel, &LocalDocsCollectionsModel::setCollections); - connect(ModelList::globalInstance()->installedModels(), &InstalledModels::countChanged, - this, &Chat::handleModelInstalled, Qt::QueuedConnection); } void Chat::reset() @@ -101,7 +96,12 @@ void Chat::processSystemPrompt() bool Chat::isModelLoaded() const { - return m_isModelLoaded; + return m_modelLoadingPercentage == 1.0f; +} + +float Chat::modelLoadingPercentage() const +{ + return m_modelLoadingPercentage; } void Chat::resetResponseState() @@ -158,16 +158,18 @@ void Chat::handleResponseChanged(const QString &response) emit responseChanged(); } -void Chat::handleModelLoadedChanged(bool loaded) +void Chat::handleModelLoadingPercentageChanged(float loadingPercentage) { if (m_shouldDeleteLater) deleteLater(); - if (loaded == m_isModelLoaded) + if (loadingPercentage == m_modelLoadingPercentage) return; - m_isModelLoaded = loaded; - emit isModelLoadedChanged(); + m_modelLoadingPercentage = loadingPercentage; + emit modelLoadingPercentageChanged(); + if (m_modelLoadingPercentage == 1.0f || m_modelLoadingPercentage == 0.0f) + emit isModelLoadedChanged(); } void Chat::promptProcessing() @@ -238,10 +240,10 @@ ModelInfo Chat::modelInfo() const void Chat::setModelInfo(const ModelInfo &modelInfo) { - if (m_modelInfo == modelInfo) + if (m_modelInfo == modelInfo && isModelLoaded()) return; - m_isModelLoaded = false; + m_modelLoadingPercentage = std::numeric_limits::min(); emit isModelLoadedChanged(); m_modelLoadingError = QString(); emit modelLoadingErrorChanged(); @@ -291,21 +293,26 @@ void Chat::unloadModel() void Chat::reloadModel() { - // If the installed model list is empty, then we mark a special flag and monitor for when a model - // is installed - if (!ModelList::globalInstance()->installedModels()->count()) { - m_shouldLoadModelWhenInstalled = true; - return; - } m_llmodel->setShouldBeLoaded(true); } -void Chat::handleModelInstalled() +void Chat::forceUnloadModel() { - if (!m_shouldLoadModelWhenInstalled) - return; - m_shouldLoadModelWhenInstalled = false; - reloadModel(); + stopGenerating(); + m_llmodel->setForceUnloadModel(true); + m_llmodel->setShouldBeLoaded(false); +} + +void Chat::forceReloadModel() +{ + m_llmodel->setForceUnloadModel(true); + m_llmodel->setShouldBeLoaded(true); +} + +void Chat::trySwitchContextOfLoadedModel() +{ + emit trySwitchContextOfLoadedModelAttempted(); + m_llmodel->setShouldTrySwitchContext(true); } void Chat::generatedNameChanged(const QString &name) diff --git a/gpt4all-chat/chat.h b/gpt4all-chat/chat.h index ae6910bf8f2a..cecbcbda9d39 100644 --- a/gpt4all-chat/chat.h +++ b/gpt4all-chat/chat.h @@ -17,6 +17,7 @@ class Chat : public QObject Q_PROPERTY(QString name READ name WRITE setName NOTIFY nameChanged) Q_PROPERTY(ChatModel *chatModel READ chatModel NOTIFY chatModelChanged) Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged) + Q_PROPERTY(float modelLoadingPercentage READ modelLoadingPercentage NOTIFY modelLoadingPercentageChanged) Q_PROPERTY(QString response READ response NOTIFY responseChanged) Q_PROPERTY(ModelInfo modelInfo READ modelInfo WRITE setModelInfo NOTIFY modelInfoChanged) Q_PROPERTY(bool responseInProgress READ responseInProgress NOTIFY responseInProgressChanged) @@ -61,6 +62,7 @@ class Chat : public QObject Q_INVOKABLE void reset(); Q_INVOKABLE void processSystemPrompt(); Q_INVOKABLE bool isModelLoaded() const; + Q_INVOKABLE float modelLoadingPercentage() const; Q_INVOKABLE void prompt(const QString &prompt); Q_INVOKABLE void regenerateResponse(); Q_INVOKABLE void stopGenerating(); @@ -75,8 +77,11 @@ class Chat : public QObject void setModelInfo(const ModelInfo &modelInfo); bool isRecalc() const; - void unloadModel(); - void reloadModel(); + Q_INVOKABLE void unloadModel(); + Q_INVOKABLE void reloadModel(); + Q_INVOKABLE void forceUnloadModel(); + Q_INVOKABLE void forceReloadModel(); + Q_INVOKABLE void trySwitchContextOfLoadedModel(); void unloadAndDeleteLater(); qint64 creationDate() const { return m_creationDate; } @@ -106,6 +111,7 @@ public Q_SLOTS: void nameChanged(); void chatModelChanged(); void isModelLoadedChanged(); + void modelLoadingPercentageChanged(); void responseChanged(); void responseInProgressChanged(); void responseStateChanged(); @@ -127,10 +133,12 @@ public Q_SLOTS: void deviceChanged(); void fallbackReasonChanged(); void collectionModelChanged(); + void trySwitchContextOfLoadedModelAttempted(); + void trySwitchContextOfLoadedModelCompleted(bool); private Q_SLOTS: void handleResponseChanged(const QString &response); - void handleModelLoadedChanged(bool); + void handleModelLoadingPercentageChanged(float); void promptProcessing(); void responseStopped(); void generatedNameChanged(const QString &name); @@ -141,7 +149,6 @@ private Q_SLOTS: void handleFallbackReasonChanged(const QString &device); void handleDatabaseResultsChanged(const QList &results); void handleModelInfoChanged(const ModelInfo &modelInfo); - void handleModelInstalled(); private: QString m_id; @@ -163,8 +170,7 @@ private Q_SLOTS: QList m_databaseResults; bool m_isServer = false; bool m_shouldDeleteLater = false; - bool m_isModelLoaded = false; - bool m_shouldLoadModelWhenInstalled = false; + float m_modelLoadingPercentage = 0.0f; LocalDocsCollectionsModel *m_collectionModel; }; diff --git a/gpt4all-chat/chatlistmodel.h b/gpt4all-chat/chatlistmodel.h index 3f99c622894e..ed04cc7a4476 100644 --- a/gpt4all-chat/chatlistmodel.h +++ b/gpt4all-chat/chatlistmodel.h @@ -179,9 +179,9 @@ class ChatListModel : public QAbstractListModel if (m_currentChat && m_currentChat != m_serverChat) m_currentChat->unloadModel(); m_currentChat = chat; - if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat) - m_currentChat->reloadModel(); emit currentChatChanged(); + if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat) + m_currentChat->trySwitchContextOfLoadedModel(); } Q_INVOKABLE Chat* get(int index) diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index 844942e44399..4b456e3464a0 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -62,7 +62,9 @@ ChatLLM::ChatLLM(Chat *parent, bool isServer) , m_promptResponseTokens(0) , m_promptTokens(0) , m_isRecalc(false) - , m_shouldBeLoaded(true) + , m_shouldBeLoaded(false) + , m_forceUnloadModel(false) + , m_shouldTrySwitchContext(false) , m_stopGenerating(false) , m_timer(nullptr) , m_isServer(isServer) @@ -76,6 +78,8 @@ ChatLLM::ChatLLM(Chat *parent, bool isServer) connect(this, &ChatLLM::sendModelLoaded, Network::globalInstance(), &Network::sendModelLoaded); connect(this, &ChatLLM::shouldBeLoadedChanged, this, &ChatLLM::handleShouldBeLoadedChanged, Qt::QueuedConnection); // explicitly queued + connect(this, &ChatLLM::shouldTrySwitchContextChanged, this, &ChatLLM::handleShouldTrySwitchContextChanged, + Qt::QueuedConnection); // explicitly queued connect(parent, &Chat::idChanged, this, &ChatLLM::handleChatIdChanged); connect(&m_llmThread, &QThread::started, this, &ChatLLM::handleThreadStarted); connect(MySettings::globalInstance(), &MySettings::forceMetalChanged, this, &ChatLLM::handleForceMetalChanged); @@ -143,6 +147,54 @@ bool ChatLLM::loadDefaultModel() return loadModel(defaultModel); } +bool ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo) +{ + // We're trying to see if the store already has the model fully loaded that we wish to use + // and if so we just acquire it from the store and switch the context and return true. If the + // store doesn't have it or we're already loaded or in any other case just return false. + + // If we're already loaded or a server or we're reloading to change the variant/device or the + // modelInfo is empty, then this should fail + if (isModelLoaded() || m_isServer || m_reloadingToChangeVariant || modelInfo.name().isEmpty()) { + m_shouldTrySwitchContext = false; + emit trySwitchContextOfLoadedModelCompleted(false); + return false; + } + + QString filePath = modelInfo.dirpath + modelInfo.filename(); + QFileInfo fileInfo(filePath); + + m_llModelInfo = LLModelStore::globalInstance()->acquireModel(); +#if defined(DEBUG_MODEL_LOADING) + qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model; +#endif + + // The store gave us no already loaded model, the wrong type of model, then give it back to the + // store and fail + if (!m_llModelInfo.model || m_llModelInfo.fileInfo != fileInfo) { + LLModelStore::globalInstance()->releaseModel(m_llModelInfo); + m_llModelInfo = LLModelInfo(); + m_shouldTrySwitchContext = false; + emit trySwitchContextOfLoadedModelCompleted(false); + return false; + } + +#if defined(DEBUG_MODEL_LOADING) + qDebug() << "store had our model" << m_llmThread.objectName() << m_llModelInfo.model; +#endif + + // We should be loaded and now we are + m_shouldBeLoaded = true; + m_shouldTrySwitchContext = false; + + // Restore, signal and process + restoreState(); + emit modelLoadingPercentageChanged(1.0f); + emit trySwitchContextOfLoadedModelCompleted(true); + processSystemPrompt(); + return true; +} + bool ChatLLM::loadModel(const ModelInfo &modelInfo) { // This is a complicated method because N different possible threads are interested in the outcome @@ -170,7 +222,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) #endif delete m_llModelInfo.model; m_llModelInfo.model = nullptr; - emit isModelLoadedChanged(false); + emit modelLoadingPercentageChanged(std::numeric_limits::min()); } else if (!m_isServer) { // This is a blocking call that tries to retrieve the model we need from the model store. // If it succeeds, then we just have to restore state. If the store has never had a model @@ -188,7 +240,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) #endif LLModelStore::globalInstance()->releaseModel(m_llModelInfo); m_llModelInfo = LLModelInfo(); - emit isModelLoadedChanged(false); + emit modelLoadingPercentageChanged(0.0f); return false; } @@ -198,7 +250,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) qDebug() << "store had our model" << m_llmThread.objectName() << m_llModelInfo.model; #endif restoreState(); - emit isModelLoadedChanged(true); + emit modelLoadingPercentageChanged(1.0f); setModelInfo(modelInfo); Q_ASSERT(!m_modelInfo.filename().isEmpty()); if (m_modelInfo.filename().isEmpty()) @@ -261,6 +313,12 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) m_llModelInfo.model = LLModel::Implementation::construct(filePath.toStdString(), buildVariant, n_ctx); if (m_llModelInfo.model) { + + m_llModelInfo.model->setProgressCallback([this](float progress) -> bool { + emit modelLoadingPercentageChanged(progress); + return m_shouldBeLoaded; + }); + // Update the settings that a model is being loaded and update the device list MySettings::globalInstance()->setAttemptModelLoad(filePath); @@ -354,7 +412,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) qDebug() << "modelLoadedChanged" << m_llmThread.objectName(); fflush(stdout); #endif - emit isModelLoadedChanged(isModelLoaded()); + emit modelLoadingPercentageChanged(isModelLoaded() ? 1.0f : 0.0f); static bool isFirstLoad = true; if (isFirstLoad) { @@ -456,6 +514,7 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo) void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo) { + m_shouldBeLoaded = true; loadModel(modelInfo); } @@ -598,6 +657,12 @@ void ChatLLM::setShouldBeLoaded(bool b) emit shouldBeLoadedChanged(); } +void ChatLLM::setShouldTrySwitchContext(bool b) +{ + m_shouldTrySwitchContext = b; // atomic + emit shouldTrySwitchContextChanged(); +} + void ChatLLM::handleShouldBeLoadedChanged() { if (m_shouldBeLoaded) @@ -606,10 +671,10 @@ void ChatLLM::handleShouldBeLoadedChanged() unloadModel(); } -void ChatLLM::forceUnloadModel() +void ChatLLM::handleShouldTrySwitchContextChanged() { - m_shouldBeLoaded = false; // atomic - unloadModel(); + if (m_shouldTrySwitchContext) + trySwitchContextOfLoadedModel(modelInfo()); } void ChatLLM::unloadModel() @@ -617,17 +682,27 @@ void ChatLLM::unloadModel() if (!isModelLoaded() || m_isServer) return; + emit modelLoadingPercentageChanged(0.0f); saveState(); #if defined(DEBUG_MODEL_LOADING) qDebug() << "unloadModel" << m_llmThread.objectName() << m_llModelInfo.model; #endif + + if (m_forceUnloadModel) { + delete m_llModelInfo.model; + m_llModelInfo.model = nullptr; + m_forceUnloadModel = false; + } + LLModelStore::globalInstance()->releaseModel(m_llModelInfo); m_llModelInfo = LLModelInfo(); - emit isModelLoadedChanged(false); } void ChatLLM::reloadModel() { + if (isModelLoaded() && m_forceUnloadModel) + unloadModel(); // we unload first if we are forcing an unload + if (isModelLoaded() || m_isServer) return; diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h index d6af4cb0c427..278e79cc0b82 100644 --- a/gpt4all-chat/chatllm.h +++ b/gpt4all-chat/chatllm.h @@ -81,6 +81,8 @@ class ChatLLM : public QObject bool shouldBeLoaded() const { return m_shouldBeLoaded; } void setShouldBeLoaded(bool b); + void setShouldTrySwitchContext(bool b); + void setForceUnloadModel(bool b) { m_forceUnloadModel = b; } QString response() const; @@ -98,14 +100,15 @@ class ChatLLM : public QObject public Q_SLOTS: bool prompt(const QList &collectionList, const QString &prompt); bool loadDefaultModel(); + bool trySwitchContextOfLoadedModel(const ModelInfo &modelInfo); bool loadModel(const ModelInfo &modelInfo); void modelChangeRequested(const ModelInfo &modelInfo); - void forceUnloadModel(); void unloadModel(); void reloadModel(); void generateName(); void handleChatIdChanged(const QString &id); void handleShouldBeLoadedChanged(); + void handleShouldTrySwitchContextChanged(); void handleThreadStarted(); void handleForceMetalChanged(bool forceMetal); void handleDeviceChanged(); @@ -114,7 +117,7 @@ public Q_SLOTS: Q_SIGNALS: void recalcChanged(); - void isModelLoadedChanged(bool); + void modelLoadingPercentageChanged(float); void modelLoadingError(const QString &error); void responseChanged(const QString &response); void promptProcessing(); @@ -125,6 +128,8 @@ public Q_SLOTS: void stateChanged(); void threadStarted(); void shouldBeLoadedChanged(); + void shouldTrySwitchContextChanged(); + void trySwitchContextOfLoadedModelCompleted(bool); void requestRetrieveFromDB(const QList &collections, const QString &text, int retrievalSize, QList *results); void reportSpeed(const QString &speed); void reportDevice(const QString &device); @@ -167,7 +172,9 @@ public Q_SLOTS: QThread m_llmThread; std::atomic m_stopGenerating; std::atomic m_shouldBeLoaded; + std::atomic m_shouldTrySwitchContext; std::atomic m_isRecalc; + std::atomic m_forceUnloadModel; bool m_isServer; bool m_forceMetal; bool m_reloadingToChangeVariant; diff --git a/gpt4all-chat/icons/eject.svg b/gpt4all-chat/icons/eject.svg new file mode 100644 index 000000000000..9649c4876d5b --- /dev/null +++ b/gpt4all-chat/icons/eject.svg @@ -0,0 +1,6 @@ + + + diff --git a/gpt4all-chat/main.qml b/gpt4all-chat/main.qml index 72fbc3b8e19a..66104e37e754 100644 --- a/gpt4all-chat/main.qml +++ b/gpt4all-chat/main.qml @@ -126,6 +126,10 @@ Window { } } + function currentModelName() { + return ModelList.modelInfo(currentChat.modelInfo.id).name; + } + PopupDialog { id: errorCompatHardware anchors.centerIn: parent @@ -282,6 +286,18 @@ Window { } } + SwitchModelDialog { + id: switchModelDialog + anchors.centerIn: parent + width: Math.min(1024, window.width - (window.width * .2)) + height: Math.min(600, window.height - (window.height * .2)) + Item { + Accessible.role: Accessible.Dialog + Accessible.name: qsTr("Switch model dialog") + Accessible.description: qsTr("Warn the user if they switch models, then context will be erased") + } + } + Rectangle { id: header anchors.left: parent.left @@ -292,7 +308,9 @@ Window { Item { anchors.centerIn: parent height: childrenRect.height - visible: currentChat.isModelLoaded || currentChat.modelLoadingError !== "" || currentChat.isServer + visible: true + || currentChat.modelLoadingError !== "" + || currentChat.isServer Label { id: modelLabel @@ -306,102 +324,168 @@ Window { horizontalAlignment: TextInput.AlignRight } - MyComboBox { - id: comboBox - implicitWidth: 375 - width: window.width >= 750 ? implicitWidth : implicitWidth - ((750 - window.width)) + RowLayout { + id: comboLayout anchors.top: modelLabel.top anchors.bottom: modelLabel.bottom anchors.horizontalCenter: parent.horizontalCenter anchors.horizontalCenterOffset: window.width >= 950 ? 0 : Math.max(-((950 - window.width) / 2), -99.5) - enabled: !currentChat.isServer - model: ModelList.installedModels - valueRole: "id" - textRole: "name" - property string currentModelName: "" - function updateCurrentModelName() { - var info = ModelList.modelInfo(currentChat.modelInfo.id); - comboBox.currentModelName = info.name; - } - Connections { - target: currentChat - function onModelInfoChanged() { - comboBox.updateCurrentModelName(); + spacing: 20 + + MyComboBox { + id: comboBox + Layout.fillWidth: true + Layout.fillHeight: true + implicitWidth: 575 + width: window.width >= 750 ? implicitWidth : implicitWidth - ((750 - window.width)) + enabled: !currentChat.isServer + model: ModelList.installedModels + valueRole: "id" + textRole: "name" + property bool isCurrentlyLoading: false + property real modelLoadingPercentage: 0.0 + property bool trySwitchContextInProgress: false + + function changeModel(index) { + comboBox.modelLoadingPercentage = 0.0; + comboBox.isCurrentlyLoading = true; + currentChat.stopGenerating() + currentChat.reset(); + currentChat.modelInfo = ModelList.modelInfo(comboBox.valueAt(index)) } - } - Connections { - target: window - function onCurrentChatChanged() { - comboBox.updateCurrentModelName(); + + Connections { + target: currentChat + function onModelLoadingPercentageChanged() { + comboBox.modelLoadingPercentage = currentChat.modelLoadingPercentage; + comboBox.isCurrentlyLoading = currentChat.modelLoadingPercentage !== 0.0 + && currentChat.modelLoadingPercentage !== 1.0; + } + function onTrySwitchContextOfLoadedModelAttempted() { + comboBox.trySwitchContextInProgress = true; + } + function onTrySwitchContextOfLoadedModelCompleted() { + comboBox.trySwitchContextInProgress = false; + } + } + Connections { + target: switchModelDialog + function onAccepted() { + comboBox.changeModel(switchModelDialog.index) + } + } + + background: ProgressBar { + id: modelProgress + value: comboBox.modelLoadingPercentage + background: Rectangle { + color: theme.mainComboBackground + radius: 10 + } + contentItem: Item { + Rectangle { + visible: comboBox.isCurrentlyLoading + anchors.bottom: parent.bottom + width: modelProgress.visualPosition * parent.width + height: 10 + radius: 2 + color: theme.progressForeground + } + } } - } - background: Rectangle { - color: theme.mainComboBackground - radius: 10 - } - contentItem: Text { - anchors.horizontalCenter: parent.horizontalCenter - leftPadding: 10 - rightPadding: 20 - text: currentChat.modelLoadingError !== "" - ? qsTr("Model loading error...") - : comboBox.currentModelName - font.pixelSize: theme.fontSizeLarger - color: theme.white - verticalAlignment: Text.AlignVCenter - horizontalAlignment: Text.AlignHCenter - elide: Text.ElideRight - } - delegate: ItemDelegate { - width: comboBox.width contentItem: Text { - text: name - color: theme.textColor - font: comboBox.font - elide: Text.ElideRight + anchors.horizontalCenter: parent.horizontalCenter + leftPadding: 10 + rightPadding: 20 + text: { + if (currentChat.modelLoadingError !== "") + return qsTr("Model loading error...") + if (comboBox.trySwitchContextInProgress) + return qsTr("Switching context...") + if (currentModelName() === "") + return qsTr("Choose a model...") + if (currentChat.modelLoadingPercentage === 0.0) + return qsTr("Reload \u00B7 ") + currentModelName() + if (comboBox.isCurrentlyLoading) + return qsTr("Loading \u00B7 ") + currentModelName() + return currentModelName() + } + font.pixelSize: theme.fontSizeLarger + color: theme.white verticalAlignment: Text.AlignVCenter + horizontalAlignment: Text.AlignHCenter + elide: Text.ElideRight + } + delegate: ItemDelegate { + id: comboItemDelegate + width: comboBox.width + contentItem: Text { + text: name + color: theme.textColor + font: comboBox.font + elide: Text.ElideRight + verticalAlignment: Text.AlignVCenter + } + background: Rectangle { + color: (index % 2 === 0 ? theme.darkContrast : theme.lightContrast) + border.width: highlighted + border.color: theme.accentColor + } + highlighted: comboBox.highlightedIndex === index } - background: Rectangle { - color: (index % 2 === 0 ? theme.darkContrast : theme.lightContrast) - border.width: highlighted - border.color: theme.accentColor + Accessible.role: Accessible.ComboBox + Accessible.name: currentModelName() + Accessible.description: qsTr("The top item is the current model") + onActivated: function (index) { + var newInfo = ModelList.modelInfo(comboBox.valueAt(index)); + if (currentModelName() !== "" + && newInfo !== currentChat.modelInfo + && chatModel.count !== 0) { + switchModelDialog.index = index; + switchModelDialog.open(); + } else { + comboBox.changeModel(index); + } } - highlighted: comboBox.highlightedIndex === index - } - Accessible.role: Accessible.ComboBox - Accessible.name: comboBox.currentModelName - Accessible.description: qsTr("The top item is the current model") - onActivated: function (index) { - currentChat.stopGenerating() - currentChat.reset(); - currentChat.modelInfo = ModelList.modelInfo(comboBox.valueAt(index)) - } - } - } - Item { - anchors.centerIn: parent - visible: ModelList.installedModels.count - && !currentChat.isModelLoaded - && currentChat.modelLoadingError === "" - && !currentChat.isServer - width: childrenRect.width - height: childrenRect.height - Row { - spacing: 5 - MyBusyIndicator { - anchors.verticalCenter: parent.verticalCenter - running: parent.visible - Accessible.role: Accessible.Animation - Accessible.name: qsTr("Busy indicator") - Accessible.description: qsTr("loading model...") - } + MyMiniButton { + id: ejectButton + visible: currentChat.isModelLoaded + z: 500 + anchors.right: parent.right + anchors.rightMargin: 50 + anchors.verticalCenter: parent.verticalCenter + source: "qrc:/gpt4all/icons/eject.svg" + backgroundColor: theme.gray300 + backgroundColorHovered: theme.iconBackgroundLight + onClicked: { + currentChat.forceUnloadModel(); + } + ToolTip.text: qsTr("Eject the currently loaded model") + ToolTip.visible: hovered + } - Label { - anchors.verticalCenter: parent.verticalCenter - text: qsTr("Loading model...") - font.pixelSize: theme.fontSizeLarge - color: theme.oppositeTextColor + MyMiniButton { + id: reloadButton + visible: currentChat.modelLoadingError === "" + && !comboBox.trySwitchContextInProgress + && (currentChat.isModelLoaded || currentModelName() !== "") + z: 500 + anchors.right: ejectButton.visible ? ejectButton.left : parent.right + anchors.rightMargin: ejectButton.visible ? 10 : 50 + anchors.verticalCenter: parent.verticalCenter + source: "qrc:/gpt4all/icons/regenerate.svg" + backgroundColor: theme.gray300 + backgroundColorHovered: theme.iconBackgroundLight + onClicked: { + if (currentChat.isModelLoaded) + currentChat.forceReloadModel(); + else + currentChat.reloadModel(); + } + ToolTip.text: qsTr("Reload the currently loaded model") + ToolTip.visible: hovered + } } } } @@ -790,9 +874,9 @@ Window { Rectangle { id: homePage - color: "transparent"//theme.green200 + color: "transparent" anchors.fill: parent - visible: (ModelList.installedModels.count === 0 || chatModel.count === 0) && !currentChat.isServer + visible: !currentChat.isModelLoaded && (ModelList.installedModels.count === 0 || currentModelName() === "") && !currentChat.isServer ColumnLayout { anchors.centerIn: parent @@ -1138,50 +1222,84 @@ Window { } } - MyButton { - id: myButton - visible: chatModel.count && !currentChat.isServer - textColor: theme.textColor - Image { - anchors.verticalCenter: parent.verticalCenter - anchors.left: parent.left - anchors.leftMargin: 15 - source: currentChat.responseInProgress ? "qrc:/gpt4all/icons/stop_generating.svg" : "qrc:/gpt4all/icons/regenerate.svg" - } - leftPadding: 50 - onClicked: { - var index = Math.max(0, chatModel.count - 1); - var listElement = chatModel.get(index); - - if (currentChat.responseInProgress) { - listElement.stopped = true - currentChat.stopGenerating() - } else { - currentChat.regenerateResponse() - if (chatModel.count) { - if (listElement.name === qsTr("Response: ")) { - chatModel.updateCurrentResponse(index, true); - chatModel.updateStopped(index, false); - chatModel.updateThumbsUpState(index, false); - chatModel.updateThumbsDownState(index, false); - chatModel.updateNewResponse(index, ""); - currentChat.prompt(listElement.prompt) + RowLayout { + anchors.bottom: textInputView.top + anchors.horizontalCenter: textInputView.horizontalCenter + anchors.bottomMargin: 20 + spacing: 10 + MyButton { + textColor: theme.textColor + visible: chatModel.count && !currentChat.isServer && currentChat.isModelLoaded + Image { + anchors.verticalCenter: parent.verticalCenter + anchors.left: parent.left + anchors.leftMargin: 15 + source: currentChat.responseInProgress ? "qrc:/gpt4all/icons/stop_generating.svg" : "qrc:/gpt4all/icons/regenerate.svg" + } + leftPadding: 50 + onClicked: { + var index = Math.max(0, chatModel.count - 1); + var listElement = chatModel.get(index); + + if (currentChat.responseInProgress) { + listElement.stopped = true + currentChat.stopGenerating() + } else { + currentChat.regenerateResponse() + if (chatModel.count) { + if (listElement.name === qsTr("Response: ")) { + chatModel.updateCurrentResponse(index, true); + chatModel.updateStopped(index, false); + chatModel.updateThumbsUpState(index, false); + chatModel.updateThumbsDownState(index, false); + chatModel.updateNewResponse(index, ""); + currentChat.prompt(listElement.prompt) + } } } } + + borderWidth: 1 + backgroundColor: theme.conversationButtonBackground + backgroundColorHovered: theme.conversationButtonBackgroundHovered + backgroundRadius: 5 + padding: 15 + topPadding: 4 + bottomPadding: 4 + text: currentChat.responseInProgress ? qsTr("Stop generating") : qsTr("Regenerate response") + fontPixelSize: theme.fontSizeSmaller + Accessible.description: qsTr("Controls generation of the response") } - background: Rectangle { - border.color: theme.conversationButtonBorder - border.width: 2 - radius: 10 - color: myButton.hovered ? theme.conversationButtonBackgroundHovered : theme.conversationButtonBackground + + MyButton { + textColor: theme.textColor + visible: chatModel.count + && !currentChat.isServer + && !currentChat.isModelLoaded + && currentChat.modelLoadingPercentage === 0.0 + && currentChat.modelInfo.name !== "" + Image { + anchors.verticalCenter: parent.verticalCenter + anchors.left: parent.left + anchors.leftMargin: 15 + source: "qrc:/gpt4all/icons/regenerate.svg" + } + leftPadding: 50 + onClicked: { + currentChat.reloadModel(); + } + + borderWidth: 1 + backgroundColor: theme.conversationButtonBackground + backgroundColorHovered: theme.conversationButtonBackgroundHovered + backgroundRadius: 5 + padding: 15 + topPadding: 4 + bottomPadding: 4 + text: qsTr("Reload \u00B7 ") + currentChat.modelInfo.name + fontPixelSize: theme.fontSizeSmaller + Accessible.description: qsTr("Reloads the model") } - anchors.bottom: textInputView.top - anchors.horizontalCenter: textInputView.horizontalCenter - anchors.bottomMargin: 20 - padding: 15 - text: currentChat.responseInProgress ? qsTr("Stop generating") : qsTr("Regenerate response") - Accessible.description: qsTr("Controls generation of the response") } Text { @@ -1224,7 +1342,7 @@ Window { rightPadding: 40 enabled: currentChat.isModelLoaded && !currentChat.isServer font.pixelSize: theme.fontSizeLarger - placeholderText: qsTr("Send a message...") + placeholderText: currentChat.isModelLoaded ? qsTr("Send a message...") : qsTr("Load a model to continue...") Accessible.role: Accessible.EditableText Accessible.name: placeholderText Accessible.description: qsTr("Send messages/prompts to the model") diff --git a/gpt4all-chat/qml/MyButton.qml b/gpt4all-chat/qml/MyButton.qml index d79c275b1e1b..6f14f9d37258 100644 --- a/gpt4all-chat/qml/MyButton.qml +++ b/gpt4all-chat/qml/MyButton.qml @@ -13,9 +13,10 @@ Button { property color mutedTextColor: theme.oppositeMutedTextColor property color backgroundColor: theme.buttonBackground property color backgroundColorHovered: theme.buttonBackgroundHovered + property real backgroundRadius: 10 property real borderWidth: MySettings.chatTheme === "LegacyDark" ? 1 : 0 property color borderColor: theme.buttonBorder - property real fontPixelSize: theme.fontSizeLarge + property real fontPixelSize: theme.fontSizeLarge contentItem: Text { text: myButton.text horizontalAlignment: Text.AlignHCenter @@ -25,7 +26,7 @@ Button { Accessible.name: text } background: Rectangle { - radius: 10 + radius: myButton.backgroundRadius border.width: myButton.borderWidth border.color: myButton.borderColor color: myButton.hovered ? backgroundColorHovered : backgroundColor diff --git a/gpt4all-chat/qml/MyMiniButton.qml b/gpt4all-chat/qml/MyMiniButton.qml new file mode 100644 index 000000000000..d5e5571aa420 --- /dev/null +++ b/gpt4all-chat/qml/MyMiniButton.qml @@ -0,0 +1,47 @@ +import QtCore +import QtQuick +import QtQuick.Controls +import QtQuick.Controls.Basic +import Qt5Compat.GraphicalEffects + +Button { + id: myButton + padding: 0 + property color backgroundColor: theme.iconBackgroundDark + property color backgroundColorHovered: theme.iconBackgroundHovered + property alias source: image.source + property alias fillMode: image.fillMode + width: 30 + height: 30 + contentItem: Text { + text: myButton.text + horizontalAlignment: Text.AlignHCenter + color: myButton.enabled ? theme.textColor : theme.mutedTextColor + font.pixelSize: theme.fontSizeLarge + Accessible.role: Accessible.Button + Accessible.name: text + } + + background: Item { + anchors.fill: parent + Rectangle { + anchors.fill: parent + color: "transparent" + } + Image { + id: image + anchors.centerIn: parent + mipmap: true + width: 20 + height: 20 + } + ColorOverlay { + anchors.fill: image + source: image + color: myButton.hovered ? backgroundColorHovered : backgroundColor + } + } + Accessible.role: Accessible.Button + Accessible.name: text + ToolTip.delay: Qt.styleHints.mousePressAndHoldInterval +} diff --git a/gpt4all-chat/qml/SwitchModelDialog.qml b/gpt4all-chat/qml/SwitchModelDialog.qml new file mode 100644 index 000000000000..54dfbe60ac02 --- /dev/null +++ b/gpt4all-chat/qml/SwitchModelDialog.qml @@ -0,0 +1,44 @@ +import QtCore +import QtQuick +import QtQuick.Controls +import QtQuick.Controls.Basic +import QtQuick.Layouts +import llm +import mysettings + +MyDialog { + id: switchModelDialog + anchors.centerIn: parent + modal: true + padding: 20 + property int index: -1 + + Theme { + id: theme + } + + Column { + id: column + spacing: 20 + } + + footer: DialogButtonBox { + id: dialogBox + padding: 20 + alignment: Qt.AlignRight + spacing: 10 + MySettingsButton { + text: qsTr("Continue") + Accessible.description: qsTr("Continue with model loading") + DialogButtonBox.buttonRole: DialogButtonBox.AcceptRole + } + MySettingsButton { + text: qsTr("Cancel") + Accessible.description: qsTr("Cancel") + DialogButtonBox.buttonRole: DialogButtonBox.RejectRole + } + background: Rectangle { + color: "transparent" + } + } +} diff --git a/gpt4all-chat/qml/Theme.qml b/gpt4all-chat/qml/Theme.qml index 49f8343cbc82..2b8c9733ebfe 100644 --- a/gpt4all-chat/qml/Theme.qml +++ b/gpt4all-chat/qml/Theme.qml @@ -555,6 +555,7 @@ QtObject { property real fontSizeFixedSmall: 16 property real fontSize: Qt.application.font.pixelSize + property real fontSizeSmaller: fontSizeLarge - 4 property real fontSizeSmall: fontSizeLarge - 2 property real fontSizeLarge: MySettings.fontSize === "Small" ? fontSize : MySettings.fontSize === "Medium" ? From ed0f93977da3d5e3b92dc5516d913443b0702acb Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Mon, 19 Feb 2024 10:37:03 -0500 Subject: [PATCH 02/17] Fixes for issues identified in review. Signed-off-by: Adam Treat --- gpt4all-chat/main.qml | 55 +++++++++++++------------- gpt4all-chat/qml/ModelSettings.qml | 6 +-- gpt4all-chat/qml/SwitchModelDialog.qml | 8 ++-- 3 files changed, 36 insertions(+), 33 deletions(-) diff --git a/gpt4all-chat/main.qml b/gpt4all-chat/main.qml index 66104e37e754..a12f2666521a 100644 --- a/gpt4all-chat/main.qml +++ b/gpt4all-chat/main.qml @@ -130,6 +130,10 @@ Window { return ModelList.modelInfo(currentChat.modelInfo.id).name; } + property bool isCurrentlyLoading: false + property real modelLoadingPercentage: 0.0 + property bool trySwitchContextInProgress: false + PopupDialog { id: errorCompatHardware anchors.centerIn: parent @@ -289,8 +293,6 @@ Window { SwitchModelDialog { id: switchModelDialog anchors.centerIn: parent - width: Math.min(1024, window.width - (window.width * .2)) - height: Math.min(600, window.height - (window.height * .2)) Item { Accessible.role: Accessible.Dialog Accessible.name: qsTr("Switch model dialog") @@ -309,8 +311,6 @@ Window { anchors.centerIn: parent height: childrenRect.height visible: true - || currentChat.modelLoadingError !== "" - || currentChat.isServer Label { id: modelLabel @@ -337,18 +337,17 @@ Window { Layout.fillWidth: true Layout.fillHeight: true implicitWidth: 575 - width: window.width >= 750 ? implicitWidth : implicitWidth - ((750 - window.width)) + width: window.width >= 750 ? implicitWidth : implicitWidth - (750 - window.width) enabled: !currentChat.isServer + && !window.trySwitchContextInProgress + && !window.isCurrentlyLoading model: ModelList.installedModels valueRole: "id" textRole: "name" - property bool isCurrentlyLoading: false - property real modelLoadingPercentage: 0.0 - property bool trySwitchContextInProgress: false function changeModel(index) { - comboBox.modelLoadingPercentage = 0.0; - comboBox.isCurrentlyLoading = true; + window.modelLoadingPercentage = 0.0; + window.isCurrentlyLoading = true; currentChat.stopGenerating() currentChat.reset(); currentChat.modelInfo = ModelList.modelInfo(comboBox.valueAt(index)) @@ -357,15 +356,15 @@ Window { Connections { target: currentChat function onModelLoadingPercentageChanged() { - comboBox.modelLoadingPercentage = currentChat.modelLoadingPercentage; - comboBox.isCurrentlyLoading = currentChat.modelLoadingPercentage !== 0.0 + window.modelLoadingPercentage = currentChat.modelLoadingPercentage; + window.isCurrentlyLoading = currentChat.modelLoadingPercentage !== 0.0 && currentChat.modelLoadingPercentage !== 1.0; } function onTrySwitchContextOfLoadedModelAttempted() { - comboBox.trySwitchContextInProgress = true; + window.trySwitchContextInProgress = true; } function onTrySwitchContextOfLoadedModelCompleted() { - comboBox.trySwitchContextInProgress = false; + window.trySwitchContextInProgress = false; } } Connections { @@ -377,14 +376,14 @@ Window { background: ProgressBar { id: modelProgress - value: comboBox.modelLoadingPercentage + value: window.modelLoadingPercentage background: Rectangle { color: theme.mainComboBackground radius: 10 } contentItem: Item { Rectangle { - visible: comboBox.isCurrentlyLoading + visible: window.isCurrentlyLoading anchors.bottom: parent.bottom width: modelProgress.visualPosition * parent.width height: 10 @@ -400,13 +399,13 @@ Window { text: { if (currentChat.modelLoadingError !== "") return qsTr("Model loading error...") - if (comboBox.trySwitchContextInProgress) + if (window.trySwitchContextInProgress) return qsTr("Switching context...") if (currentModelName() === "") return qsTr("Choose a model...") if (currentChat.modelLoadingPercentage === 0.0) return qsTr("Reload \u00B7 ") + currentModelName() - if (comboBox.isCurrentlyLoading) + if (window.isCurrentlyLoading) return qsTr("Loading \u00B7 ") + currentModelName() return currentModelName() } @@ -468,7 +467,8 @@ Window { MyMiniButton { id: reloadButton visible: currentChat.modelLoadingError === "" - && !comboBox.trySwitchContextInProgress + && !window.trySwitchContextInProgress + && !window.isCurrentlyLoading && (currentChat.isModelLoaded || currentModelName() !== "") z: 500 anchors.right: ejectButton.visible ? ejectButton.left : parent.right @@ -1264,8 +1264,8 @@ Window { backgroundColorHovered: theme.conversationButtonBackgroundHovered backgroundRadius: 5 padding: 15 - topPadding: 4 - bottomPadding: 4 + topPadding: 8 + bottomPadding: 8 text: currentChat.responseInProgress ? qsTr("Stop generating") : qsTr("Regenerate response") fontPixelSize: theme.fontSizeSmaller Accessible.description: qsTr("Controls generation of the response") @@ -1273,11 +1273,12 @@ Window { MyButton { textColor: theme.textColor - visible: chatModel.count - && !currentChat.isServer + visible: !currentChat.isServer && !currentChat.isModelLoaded - && currentChat.modelLoadingPercentage === 0.0 - && currentChat.modelInfo.name !== "" + && !window.trySwitchContextInProgress + && !window.isCurrentlyLoading + && currentModelName() !== "" + Image { anchors.verticalCenter: parent.verticalCenter anchors.left: parent.left @@ -1294,8 +1295,8 @@ Window { backgroundColorHovered: theme.conversationButtonBackgroundHovered backgroundRadius: 5 padding: 15 - topPadding: 4 - bottomPadding: 4 + topPadding: 8 + bottomPadding: 8 text: qsTr("Reload \u00B7 ") + currentChat.modelInfo.name fontPixelSize: theme.fontSizeSmaller Accessible.description: qsTr("Reloads the model") diff --git a/gpt4all-chat/qml/ModelSettings.qml b/gpt4all-chat/qml/ModelSettings.qml index ce2f51570643..d338dc15bd92 100644 --- a/gpt4all-chat/qml/ModelSettings.qml +++ b/gpt4all-chat/qml/ModelSettings.qml @@ -328,7 +328,7 @@ MySettingsTab { text: root.currentModelInfo.contextLength font.pixelSize: theme.fontSizeLarge color: theme.textColor - ToolTip.text: qsTr("Maximum combined prompt/response tokens before information is lost.\nUsing more context than the model was trained on will yield poor results.\nNOTE: Does not take effect until you RESTART GPT4All or SWITCH MODELS.") + ToolTip.text: qsTr("Maximum combined prompt/response tokens before information is lost.\nUsing more context than the model was trained on will yield poor results.\nNOTE: Does not take effect until you reload the model.") ToolTip.visible: hovered Layout.row: 0 Layout.column: 1 @@ -692,7 +692,7 @@ MySettingsTab { text: root.currentModelInfo.gpuLayers font.pixelSize: theme.fontSizeLarge color: theme.textColor - ToolTip.text: qsTr("How many GPU layers to load into VRAM. Decrease this if GPT4All runs out of VRAM while loading this model.\nLower values increase CPU load and RAM usage, and make inference slower.\nNOTE: Does not take effect until you RESTART GPT4All or SWITCH MODELS.") + ToolTip.text: qsTr("How many GPU layers to load into VRAM. Decrease this if GPT4All runs out of VRAM while loading this model.\nLower values increase CPU load and RAM usage, and make inference slower.\nNOTE: Does not take effect until you reload the model.") ToolTip.visible: hovered Layout.row: 4 Layout.column: 1 @@ -705,7 +705,7 @@ MySettingsTab { Connections { target: root function onCurrentModelInfoChanged() { - if (root.currentModelInfo.gpuLayers == 100) { + if (root.currentModelInfo.gpuLayers === 100) { gpuLayersField.text = root.currentModelInfo.maxGpuLayers } else { gpuLayersField.text = root.currentModelInfo.gpuLayers diff --git a/gpt4all-chat/qml/SwitchModelDialog.qml b/gpt4all-chat/qml/SwitchModelDialog.qml index 54dfbe60ac02..f0ca43abbc24 100644 --- a/gpt4all-chat/qml/SwitchModelDialog.qml +++ b/gpt4all-chat/qml/SwitchModelDialog.qml @@ -17,9 +17,11 @@ MyDialog { id: theme } - Column { - id: column - spacing: 20 + contentItem: Text { + textFormat: Text.StyledText + text: qsTr("Warning: changing the model will erase the current conversation. Do you wish to continue?") + color: theme.textColor + font.pixelSize: theme.fontSizeLarge } footer: DialogButtonBox { From fbf5e5e7326c792355d51121fae9767388a17671 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Tue, 20 Feb 2024 09:27:28 -0500 Subject: [PATCH 03/17] Increase padding for elided text in combo. Signed-off-by: Adam Treat --- gpt4all-chat/main.qml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gpt4all-chat/main.qml b/gpt4all-chat/main.qml index a12f2666521a..337a14b9f5ed 100644 --- a/gpt4all-chat/main.qml +++ b/gpt4all-chat/main.qml @@ -395,7 +395,13 @@ Window { contentItem: Text { anchors.horizontalCenter: parent.horizontalCenter leftPadding: 10 - rightPadding: 20 + rightPadding: { + if (ejectButton.visible && reloadButton) + return 105; + if (reloadButton.visible) + return 65 + return 25 + } text: { if (currentChat.modelLoadingError !== "") return qsTr("Model loading error...") From ad34c2bdd40a8226f82b9637f17119d528cde838 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Tue, 20 Feb 2024 12:05:13 -0500 Subject: [PATCH 04/17] Don't erase context when reloading model by selection. Signed-off-by: Adam Treat --- gpt4all-chat/main.qml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gpt4all-chat/main.qml b/gpt4all-chat/main.qml index 337a14b9f5ed..be1ec94e6b85 100644 --- a/gpt4all-chat/main.qml +++ b/gpt4all-chat/main.qml @@ -443,9 +443,9 @@ Window { Accessible.description: qsTr("The top item is the current model") onActivated: function (index) { var newInfo = ModelList.modelInfo(comboBox.valueAt(index)); - if (currentModelName() !== "" - && newInfo !== currentChat.modelInfo - && chatModel.count !== 0) { + if (newInfo === currentChat.modelInfo) { + currentChat.reloadModel(); + } else if (currentModelName() !== "" && chatModel.count !== 0) { switchModelDialog.index = index; switchModelDialog.open(); } else { From 67099f80ba3fcd68c32a215b76c5d3866d142bc0 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Wed, 21 Feb 2024 09:54:27 -0500 Subject: [PATCH 05/17] Add comment to make this clear. Signed-off-by: Adam Treat --- gpt4all-chat/chat.cpp | 2 +- gpt4all-chat/chatllm.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gpt4all-chat/chat.cpp b/gpt4all-chat/chat.cpp index 8730adbcee05..62c33a1a6c29 100644 --- a/gpt4all-chat/chat.cpp +++ b/gpt4all-chat/chat.cpp @@ -243,7 +243,7 @@ void Chat::setModelInfo(const ModelInfo &modelInfo) if (m_modelInfo == modelInfo && isModelLoaded()) return; - m_modelLoadingPercentage = std::numeric_limits::min(); + m_modelLoadingPercentage = std::numeric_limits::min(); // small non-zero positive value emit isModelLoadedChanged(); m_modelLoadingError = QString(); emit modelLoadingErrorChanged(); diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index 4b456e3464a0..bf3f6253aab0 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -222,7 +222,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) #endif delete m_llModelInfo.model; m_llModelInfo.model = nullptr; - emit modelLoadingPercentageChanged(std::numeric_limits::min()); + emit modelLoadingPercentageChanged(std::numeric_limits::min()); // small non-zero positive value } else if (!m_isServer) { // This is a blocking call that tries to retrieve the model we need from the model store. // If it succeeds, then we just have to restore state. If the store has never had a model From b0c471aed8acd628e5b987cdf2540871301939ae Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Wed, 21 Feb 2024 10:49:13 -0500 Subject: [PATCH 06/17] Make the reload/regenerate buttons a little bit larger font. Signed-off-by: Adam Treat --- gpt4all-chat/main.qml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gpt4all-chat/main.qml b/gpt4all-chat/main.qml index be1ec94e6b85..b3392fbd757c 100644 --- a/gpt4all-chat/main.qml +++ b/gpt4all-chat/main.qml @@ -1273,7 +1273,7 @@ Window { topPadding: 8 bottomPadding: 8 text: currentChat.responseInProgress ? qsTr("Stop generating") : qsTr("Regenerate response") - fontPixelSize: theme.fontSizeSmaller + fontPixelSize: theme.fontSizeSmall Accessible.description: qsTr("Controls generation of the response") } @@ -1304,7 +1304,7 @@ Window { topPadding: 8 bottomPadding: 8 text: qsTr("Reload \u00B7 ") + currentChat.modelInfo.name - fontPixelSize: theme.fontSizeSmaller + fontPixelSize: theme.fontSizeSmall Accessible.description: qsTr("Reloads the model") } } From fa0a2129dcca6fe5b61f761c5a41fa38cfd4a871 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Wed, 21 Feb 2024 11:06:57 -0500 Subject: [PATCH 07/17] Don't try and detect model load error on startup. Signed-off-by: Adam Treat --- gpt4all-chat/chatllm.cpp | 14 -------------- gpt4all-chat/mysettings.cpp | 21 --------------------- gpt4all-chat/mysettings.h | 2 -- 3 files changed, 37 deletions(-) diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index bf3f6253aab0..750e85485b41 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -274,16 +274,6 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) // Store the file info in the modelInfo in case we have an error loading m_llModelInfo.fileInfo = fileInfo; - // Check if we've previously tried to load this file and failed/crashed - if (MySettings::globalInstance()->attemptModelLoad() == filePath) { - MySettings::globalInstance()->setAttemptModelLoad(QString()); // clear the flag - if (!m_isServer) - LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store - m_llModelInfo = LLModelInfo(); - emit modelLoadingError(QString("Previous attempt to load model resulted in crash for `%1` most likely due to insufficient memory. You should either remove this model or decrease your system RAM usage by closing other applications.").arg(modelInfo.filename())); - return false; - } - if (fileInfo.exists()) { if (isChatGPT) { QString apiKey; @@ -319,9 +309,6 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) return m_shouldBeLoaded; }); - // Update the settings that a model is being loaded and update the device list - MySettings::globalInstance()->setAttemptModelLoad(filePath); - // Pick the best match for the device QString actualDevice = m_llModelInfo.model->implementation().buildVariant() == "metal" ? "Metal" : "CPU"; const QString requestedDevice = MySettings::globalInstance()->device(); @@ -373,7 +360,6 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) emit reportFallbackReason("
model or quant has no GPU support"); } - MySettings::globalInstance()->setAttemptModelLoad(QString()); if (!success) { delete m_llModelInfo.model; m_llModelInfo.model = nullptr; diff --git a/gpt4all-chat/mysettings.cpp b/gpt4all-chat/mysettings.cpp index f9774bde2f57..9e5cdad0ce06 100644 --- a/gpt4all-chat/mysettings.cpp +++ b/gpt4all-chat/mysettings.cpp @@ -717,24 +717,3 @@ void MySettings::setNetworkUsageStatsActive(bool b) setting.sync(); emit networkUsageStatsActiveChanged(); } - -QString MySettings::attemptModelLoad() const -{ - QSettings setting; - setting.sync(); - return setting.value("attemptModelLoad", QString()).toString(); -} - -void MySettings::setAttemptModelLoad(const QString &modelFile) -{ - if (attemptModelLoad() == modelFile) - return; - - QSettings setting; - if (modelFile.isEmpty()) - setting.remove("attemptModelLoad"); - else - setting.setValue("attemptModelLoad", modelFile); - setting.sync(); - emit attemptModelLoadChanged(); -} diff --git a/gpt4all-chat/mysettings.h b/gpt4all-chat/mysettings.h index 4bfbef6b6390..c5019b91c8a4 100644 --- a/gpt4all-chat/mysettings.h +++ b/gpt4all-chat/mysettings.h @@ -110,8 +110,6 @@ class MySettings : public QObject bool networkUsageStatsActive() const; void setNetworkUsageStatsActive(bool b); - QString attemptModelLoad() const; - void setAttemptModelLoad(const QString &modelFile); QVector deviceList() const; void setDeviceList(const QVector &deviceList); From 896fc6fbb72c6a30bcf87a6b42cfb83d5a219d3a Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Wed, 21 Feb 2024 11:40:05 -0500 Subject: [PATCH 08/17] Save the window size for the user and reuse next load. Signed-off-by: Adam Treat --- gpt4all-chat/main.qml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gpt4all-chat/main.qml b/gpt4all-chat/main.qml index b3392fbd757c..7bacb6cb2b79 100644 --- a/gpt4all-chat/main.qml +++ b/gpt4all-chat/main.qml @@ -21,6 +21,14 @@ Window { visible: true title: qsTr("GPT4All v") + Qt.application.version + + Settings { + property alias x: window.x + property alias y: window.y + property alias width: window.width + property alias height: window.height + } + Theme { id: theme } From 7810b757c9120a93533fbcf56d169272b881d6bb Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 21 Feb 2024 11:41:37 -0500 Subject: [PATCH 09/17] llamamodel: add gemma model support Signed-off-by: Jared Van Bortel --- gpt4all-backend/llama.cpp-mainline | 2 +- gpt4all-backend/llamamodel.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline index 822a9c894eb3..7d4ced850548 160000 --- a/gpt4all-backend/llama.cpp-mainline +++ b/gpt4all-backend/llama.cpp-mainline @@ -1 +1 @@ -Subproject commit 822a9c894eb3770c65f0b4a724aae34605c90029 +Subproject commit 7d4ced850548642b9a1740fa25ecdef249fbf47f diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 0dd9de5d96ed..167d10ee5661 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -519,8 +519,8 @@ DLL_EXPORT bool magic_match(const char *fname) { bool valid = true; static const std::vector known_arches { - "baichuan", "bloom", "codeshell", "falcon", "gpt2", "llama", "mpt", "orion", "persimmon", "phi2", "plamo", - "qwen", "qwen2", "refact", "stablelm", "starcoder" + "baichuan", "bloom", "codeshell", "falcon", "gemma", "gpt2", "llama", "mpt", "orion", "persimmon", "phi2", + "plamo", "qwen", "qwen2", "refact", "stablelm", "starcoder" }; if (std::find(known_arches.begin(), known_arches.end(), arch) == known_arches.end()) { From 32837fb3a0cc6074544661115dad09665b9704e7 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 21 Feb 2024 14:05:26 -0500 Subject: [PATCH 10/17] models2.json: add gemma model Signed-off-by: Jared Van Bortel --- gpt4all-chat/metadata/models2.json | 48 ++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/gpt4all-chat/metadata/models2.json b/gpt4all-chat/metadata/models2.json index 95ef5ad84377..124f17315fc1 100644 --- a/gpt4all-chat/metadata/models2.json +++ b/gpt4all-chat/metadata/models2.json @@ -1,6 +1,22 @@ [ { "order": "a", + "md5sum": "6d1ca6e9533d177361fe2612a2c87474", + "name": "Gemma Instruct", + "filename": "gemma-7b-it.Q4_0.gguf", + "filesize": "4809316512", + "requires": "2.5.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Gemma", + "description": "A state-of-the-art open model from Google
  • Fast responses
  • Chat based model
  • Trained by Google
  • Licensed for commercial use
  • Gemma is provided under and subject to the Gemma Terms of Use found at ai.google.dev/gemma/terms
", + "url": "https://gpt4all.io/models/gguf/gemma-7b-it.Q4_0.gguf", + "promptTemplate": "user\n%1\nmodel\n\n", + "systemPrompt": "" + }, + { + "order": "b", "md5sum": "48de9538c774188eb25a7e9ee024bbd3", "name": "Mistral OpenOrca", "filename": "mistral-7b-openorca.Q4_0.gguf", @@ -15,22 +31,6 @@ "promptTemplate": "<|im_start|>user\n%1<|im_end|><|im_start|>assistant\n", "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>" }, - { - "order": "b", - "md5sum": "97463be739b50525df56d33b26b00852", - "name": "Mistral Instruct", - "filename": "mistral-7b-instruct-v0.1.Q4_0.gguf", - "filesize": "4108916384", - "requires": "2.5.0", - "ramrequired": "8", - "parameters": "7 billion", - "quant": "q4_0", - "type": "Mistral", - "systemPrompt": " ", - "description": "Best overall fast instruction following model
  • Fast responses
  • Trained by Mistral AI
  • Uncensored
  • Licensed for commercial use
", - "url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf", - "promptTemplate": "[INST] %1 [/INST]" - }, { "order": "c", "md5sum": "c4c78adf744d6a20f05c8751e3961b84", @@ -47,6 +47,22 @@ "url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf", "promptTemplate": "### Instruction:\n%1\n### Response:\n" }, + { + "order": "d", + "md5sum": "97463be739b50525df56d33b26b00852", + "name": "Mistral Instruct", + "filename": "mistral-7b-instruct-v0.1.Q4_0.gguf", + "filesize": "4108916384", + "requires": "2.5.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Mistral", + "systemPrompt": " ", + "description": "Best overall fast instruction following model
  • Fast responses
  • Trained by Mistral AI
  • Uncensored
  • Licensed for commercial use
", + "url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf", + "promptTemplate": "[INST] %1 [/INST]" + }, { "order": "e", "md5sum": "00c8593ba57f5240f59662367b3ed4a5", From 4a8c6d7f9cc1aea0b75309cd4c542598836d2a9d Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 21 Feb 2024 14:16:12 -0500 Subject: [PATCH 11/17] gemma: fix default prompt template Signed-off-by: Jared Van Bortel --- gpt4all-chat/metadata/models2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt4all-chat/metadata/models2.json b/gpt4all-chat/metadata/models2.json index 124f17315fc1..91cffee819dd 100644 --- a/gpt4all-chat/metadata/models2.json +++ b/gpt4all-chat/metadata/models2.json @@ -12,7 +12,7 @@ "type": "Gemma", "description": "A state-of-the-art open model from Google
  • Fast responses
  • Chat based model
  • Trained by Google
  • Licensed for commercial use
  • Gemma is provided under and subject to the Gemma Terms of Use found at ai.google.dev/gemma/terms
", "url": "https://gpt4all.io/models/gguf/gemma-7b-it.Q4_0.gguf", - "promptTemplate": "user\n%1\nmodel\n\n", + "promptTemplate": "user\n%1\nmodel\n", "systemPrompt": "" }, { From c13202a6f5f90094629cc6e214a2a4ccd91ccb74 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 21 Feb 2024 14:43:55 -0500 Subject: [PATCH 12/17] models2.json: gemma requires a future GPT4All version Signed-off-by: Jared Van Bortel --- gpt4all-chat/metadata/models2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt4all-chat/metadata/models2.json b/gpt4all-chat/metadata/models2.json index 91cffee819dd..5096cd032228 100644 --- a/gpt4all-chat/metadata/models2.json +++ b/gpt4all-chat/metadata/models2.json @@ -5,7 +5,7 @@ "name": "Gemma Instruct", "filename": "gemma-7b-it.Q4_0.gguf", "filesize": "4809316512", - "requires": "2.5.0", + "requires": "2.7.1", "ramrequired": "8", "parameters": "7 billion", "quant": "q4_0", From b8f5c74f40def7622a7e4b5aa86fadb473f39046 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 21 Feb 2024 15:41:20 -0500 Subject: [PATCH 13/17] add models3.json for new templates (#1993) Signed-off-by: Jared Van Bortel --- gpt4all-chat/metadata/models3.json | 257 +++++++++++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 gpt4all-chat/metadata/models3.json diff --git a/gpt4all-chat/metadata/models3.json b/gpt4all-chat/metadata/models3.json new file mode 100644 index 000000000000..5e33ca0f88b0 --- /dev/null +++ b/gpt4all-chat/metadata/models3.json @@ -0,0 +1,257 @@ +[ + { + "order": "a", + "md5sum": "6d1ca6e9533d177361fe2612a2c87474", + "name": "Gemma Instruct", + "filename": "gemma-7b-it.Q4_0.gguf", + "filesize": "4809316512", + "requires": "2.7.1", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Gemma", + "description": "A state-of-the-art open model from Google
  • Fast responses
  • Chat based model
  • Trained by Google
  • Licensed for commercial use
  • Gemma is provided under and subject to the Gemma Terms of Use found at ai.google.dev/gemma/terms
", + "url": "https://gpt4all.io/models/gguf/gemma-7b-it.Q4_0.gguf", + "promptTemplate": "user\n%1\nmodel\n%2\n", + "systemPrompt": "" + }, + { + "order": "b", + "md5sum": "f692417a22405d80573ac10cb0cd6c6a", + "name": "Mistral OpenOrca", + "filename": "mistral-7b-openorca.Q4_0.gguf2.gguf", + "filesize": "4108928128", + "requires": "2.5.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Mistral", + "description": "Best overall fast chat model
  • Fast responses
  • Chat based model
  • Trained by Mistral AI
  • Finetuned on OpenOrca dataset curated via Nomic Atlas
  • Licensed for commercial use
", + "url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.Q4_0.gguf", + "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", + "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>" + }, + { + "order": "c", + "md5sum": "c4c78adf744d6a20f05c8751e3961b84", + "name": "GPT4All Falcon", + "filename": "gpt4all-falcon-newbpe-q4_0.gguf", + "filesize": "4210994112", + "requires": "2.6.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Falcon", + "systemPrompt": " ", + "description": "Very fast model with good quality
  • Fastest responses
  • Instruction based
  • Trained by TII
  • Finetuned by Nomic AI
  • Licensed for commercial use
", + "url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf", + "promptTemplate": "### Instruction:\n%1\n### Response:\n" + }, + { + "order": "d", + "md5sum": "97463be739b50525df56d33b26b00852", + "name": "Mistral Instruct", + "filename": "mistral-7b-instruct-v0.1.Q4_0.gguf", + "filesize": "4108916384", + "requires": "2.5.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Mistral", + "systemPrompt": " ", + "description": "Best overall fast instruction following model
  • Fast responses
  • Trained by Mistral AI
  • Uncensored
  • Licensed for commercial use
", + "url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf", + "promptTemplate": "[INST] %1 [/INST]" + }, + { + "order": "e", + "md5sum": "00c8593ba57f5240f59662367b3ed4a5", + "name": "Orca 2 (Medium)", + "filename": "orca-2-7b.Q4_0.gguf", + "filesize": "3825824192", + "requires": "2.5.2", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "LLaMA2", + "systemPrompt": " ", + "description": "
  • Instruction based
  • Trained by Microsoft
  • Cannot be used commercially
", + "url": "https://gpt4all.io/models/gguf/orca-2-7b.Q4_0.gguf" + }, + { + "order": "f", + "md5sum": "3c0d63c4689b9af7baa82469a6f51a19", + "name": "Orca 2 (Full)", + "filename": "orca-2-13b.Q4_0.gguf", + "filesize": "7365856064", + "requires": "2.5.2", + "ramrequired": "16", + "parameters": "13 billion", + "quant": "q4_0", + "type": "LLaMA2", + "systemPrompt": " ", + "description": "
  • Instruction based
  • Trained by Microsoft
  • Cannot be used commercially
", + "url": "https://gpt4all.io/models/gguf/orca-2-13b.Q4_0.gguf" + }, + { + "order": "g", + "md5sum": "5aff90007499bce5c64b1c0760c0b186", + "name": "Wizard v1.2", + "filename": "wizardlm-13b-v1.2.Q4_0.gguf", + "filesize": "7365834624", + "requires": "2.5.0", + "ramrequired": "16", + "parameters": "13 billion", + "quant": "q4_0", + "type": "LLaMA2", + "systemPrompt": " ", + "description": "Best overall larger model
  • Instruction based
  • Gives very long responses
  • Finetuned with only 1k of high-quality data
  • Trained by Microsoft and Peking University
  • Cannot be used commercially
", + "url": "https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf" + }, + { + "order": "h", + "md5sum": "3d12810391d04d1153b692626c0c6e16", + "name": "Hermes", + "filename": "nous-hermes-llama2-13b.Q4_0.gguf", + "filesize": "7366062080", + "requires": "2.5.0", + "ramrequired": "16", + "parameters": "13 billion", + "quant": "q4_0", + "type": "LLaMA2", + "systemPrompt": " ", + "description": "Extremely good model
  • Instruction based
  • Gives long responses
  • Curated with 300,000 uncensored instructions
  • Trained by Nous Research
  • Cannot be used commercially
", + "url": "https://gpt4all.io/models/gguf/nous-hermes-llama2-13b.Q4_0.gguf", + "promptTemplate": "### Instruction:\n%1\n### Response:\n" + }, + { + "order": "i", + "md5sum": "40388eb2f8d16bb5d08c96fdfaac6b2c", + "name": "Snoozy", + "filename": "gpt4all-13b-snoozy-q4_0.gguf", + "filesize": "7365834624", + "requires": "2.5.0", + "ramrequired": "16", + "parameters": "13 billion", + "quant": "q4_0", + "type": "LLaMA", + "systemPrompt": " ", + "description": "Very good overall model
  • Instruction based
  • Based on the same dataset as Groovy
  • Slower than Groovy, with higher quality responses
  • Trained by Nomic AI
  • Cannot be used commercially
", + "url": "https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf" + }, + { + "order": "j", + "md5sum": "15dcb4d7ea6de322756449c11a0b7545", + "name": "MPT Chat", + "filename": "mpt-7b-chat-newbpe-q4_0.gguf", + "filesize": "3912373472", + "requires": "2.6.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "MPT", + "description": "Good model with novel architecture
  • Fast responses
  • Chat based
  • Trained by Mosaic ML
  • Cannot be used commercially
", + "url": "https://gpt4all.io/models/gguf/mpt-7b-chat-newbpe-q4_0.gguf", + "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", + "systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>" + }, + { + "order": "k", + "md5sum": "0e769317b90ac30d6e09486d61fefa26", + "name": "Mini Orca (Small)", + "filename": "orca-mini-3b-gguf2-q4_0.gguf", + "filesize": "1979946720", + "requires": "2.5.0", + "ramrequired": "4", + "parameters": "3 billion", + "quant": "q4_0", + "type": "OpenLLaMa", + "description": "Small version of new model with novel dataset
  • Instruction based
  • Explain tuned datasets
  • Orca Research Paper dataset construction approaches
  • Cannot be used commercially
", + "url": "https://gpt4all.io/models/gguf/orca-mini-3b-gguf2-q4_0.gguf", + "promptTemplate": "### User:\n%1\n### Response:\n", + "systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n" + }, + { + "order": "l", + "md5sum": "c232f17e09bca4b7ee0b5b1f4107c01e", + "disableGUI": "true", + "name": "Replit", + "filename": "replit-code-v1_5-3b-newbpe-q4_0.gguf", + "filesize": "1953055104", + "requires": "2.6.0", + "ramrequired": "4", + "parameters": "3 billion", + "quant": "q4_0", + "type": "Replit", + "systemPrompt": " ", + "promptTemplate": "%1", + "description": "Trained on subset of the Stack
  • Code completion based
  • Licensed for commercial use
  • WARNING: Not available for chat GUI
", + "url": "https://gpt4all.io/models/gguf/replit-code-v1_5-3b-newbpe-q4_0.gguf" + }, + { + "order": "m", + "md5sum": "70841751ccd95526d3dcfa829e11cd4c", + "disableGUI": "true", + "name": "Starcoder", + "filename": "starcoder-newbpe-q4_0.gguf", + "filesize": "8987411904", + "requires": "2.6.0", + "ramrequired": "4", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Starcoder", + "systemPrompt": " ", + "promptTemplate": "%1", + "description": "Trained on subset of the Stack
  • Code completion based
  • WARNING: Not available for chat GUI
", + "url": "https://gpt4all.io/models/gguf/starcoder-newbpe-q4_0.gguf" + }, + { + "order": "n", + "md5sum": "e973dd26f0ffa6e46783feaea8f08c83", + "disableGUI": "true", + "name": "Rift coder", + "filename": "rift-coder-v0-7b-q4_0.gguf", + "filesize": "3825903776", + "requires": "2.5.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "LLaMA", + "systemPrompt": " ", + "promptTemplate": "%1", + "description": "Trained on collection of Python and TypeScript
  • Code completion based
  • WARNING: Not available for chat GUI
  • ", + "url": "https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf" + }, + { + "order": "o", + "md5sum": "e479e6f38b59afc51a470d1953a6bfc7", + "disableGUI": "true", + "name": "SBert", + "filename": "all-MiniLM-L6-v2-f16.gguf", + "filesize": "45887744", + "requires": "2.5.0", + "ramrequired": "1", + "parameters": "40 million", + "quant": "f16", + "type": "Bert", + "systemPrompt": " ", + "description": "LocalDocs text embeddings model
    • For use with LocalDocs feature
    • Used for retrieval augmented generation (RAG)", + "url": "https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf" + }, + { + "order": "p", + "md5sum": "919de4dd6f25351bcb0223790db1932d", + "name": "EM German Mistral", + "filename": "em_german_mistral_v01.Q4_0.gguf", + "filesize": "4108916352", + "requires": "2.5.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Mistral", + "description": "Mistral-based model for German-language applications
      • Fast responses
      • Chat based model
      • Trained by ellamind
      • Finetuned on German instruction and chat data
      • Licensed for commercial use
      ", + "url": "https://huggingface.co/TheBloke/em_german_mistral_v01-GGUF/resolve/main/em_german_mistral_v01.Q4_0.gguf", + "promptTemplate": "USER: %1 ASSISTANT: ", + "systemPrompt": "Du bist ein hilfreicher Assistent. " + } +] From 4fc4d94be440c7991a5bafb87eef6e0fd54a2e13 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 21 Feb 2024 15:45:32 -0500 Subject: [PATCH 14/17] fix chat-style prompt templates (#1970) Also use a new version of Mistral OpenOrca. Signed-off-by: Jared Van Bortel --- gpt4all-backend/bert.cpp | 4 +- gpt4all-backend/bert_impl.h | 7 +- gpt4all-backend/gptj.cpp | 4 +- gpt4all-backend/gptj/placeholder | 0 gpt4all-backend/gptj_impl.h | 7 +- gpt4all-backend/llama.cpp-mainline | 2 +- gpt4all-backend/llama/placeholder | 0 gpt4all-backend/llamamodel.cpp | 183 +++++++++++------- gpt4all-backend/llamamodel_impl.h | 14 +- gpt4all-backend/llmodel.h | 50 +++-- gpt4all-backend/llmodel_c.cpp | 18 +- gpt4all-backend/llmodel_c.h | 6 +- gpt4all-backend/llmodel_shared.cpp | 136 +++++++++++-- .../python/docs/gpt4all_python.md | 84 -------- gpt4all-bindings/python/gpt4all/_pyllmodel.py | 19 +- gpt4all-bindings/python/gpt4all/gpt4all.py | 61 +++--- gpt4all-bindings/python/setup.py | 2 +- gpt4all-chat/chatgpt.cpp | 9 +- gpt4all-chat/chatgpt.h | 50 ++++- gpt4all-chat/chatllm.cpp | 68 +++---- gpt4all-chat/metadata/models2.json | 10 +- gpt4all-chat/modellist.cpp | 2 +- 22 files changed, 429 insertions(+), 307 deletions(-) delete mode 100644 gpt4all-backend/gptj/placeholder delete mode 100644 gpt4all-backend/llama/placeholder diff --git a/gpt4all-backend/bert.cpp b/gpt4all-backend/bert.cpp index 342827e2da65..bad5a422a436 100644 --- a/gpt4all-backend/bert.cpp +++ b/gpt4all-backend/bert.cpp @@ -814,8 +814,10 @@ std::vector Bert::embedding(const std::string &text) return finalEmbeddings; } -std::vector Bert::tokenize(PromptContext &, const std::string &str) const +std::vector Bert::tokenize(PromptContext &ctx, const std::string &str, bool special) const { + (void)ctx; + (void)special; return ::bert_tokenize(d_ptr->ctx, str.c_str()); } diff --git a/gpt4all-backend/bert_impl.h b/gpt4all-backend/bert_impl.h index 072e9783217f..610cc2c9f2cf 100644 --- a/gpt4all-backend/bert_impl.h +++ b/gpt4all-backend/bert_impl.h @@ -33,12 +33,13 @@ class Bert : public LLModel { std::unique_ptr d_ptr; protected: - std::vector tokenize(PromptContext &, const std::string&) const override; + std::vector tokenize(PromptContext &ctx, const std::string &str, bool special) const override; Token sampleToken(PromptContext &ctx) const override; - std::string tokenToString(Token) const override; + std::string tokenToString(Token id) const override; bool evalTokens(PromptContext &ctx, const std::vector &tokens) const override; int32_t contextLength() const override; - const std::vector& endTokens() const override; + const std::vector &endTokens() const override; + bool shouldAddBOS() const override { return true; } }; #endif // BERT_H diff --git a/gpt4all-backend/gptj.cpp b/gpt4all-backend/gptj.cpp index 51a032f803f2..fcc4ae2a26e5 100644 --- a/gpt4all-backend/gptj.cpp +++ b/gpt4all-backend/gptj.cpp @@ -737,8 +737,10 @@ size_t GPTJ::restoreState(const uint8_t *src) return gptj_set_state_data(d_ptr->model, &d_ptr->rng, src); } -std::vector GPTJ::tokenize(PromptContext &, const std::string &str) const +std::vector GPTJ::tokenize(PromptContext &ctx, const std::string &str, bool special) const { + (void)ctx; + (void)special; return ::gpt_tokenize(d_ptr->vocab, str); } diff --git a/gpt4all-backend/gptj/placeholder b/gpt4all-backend/gptj/placeholder deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/gpt4all-backend/gptj_impl.h b/gpt4all-backend/gptj_impl.h index 01d5698f79f0..5d940af3dfc8 100644 --- a/gpt4all-backend/gptj_impl.h +++ b/gpt4all-backend/gptj_impl.h @@ -30,12 +30,13 @@ class GPTJ : public LLModel { GPTJPrivate *d_ptr; protected: - std::vector tokenize(PromptContext &, const std::string&) const override; + std::vector tokenize(PromptContext &ctx, const std::string &str, bool special) const override; Token sampleToken(PromptContext &ctx) const override; - std::string tokenToString(Token) const override; + std::string tokenToString(Token id) const override; bool evalTokens(PromptContext &ctx, const std::vector &tokens) const override; int32_t contextLength() const override; - const std::vector& endTokens() const override; + const std::vector &endTokens() const override; + bool shouldAddBOS() const override { return false; } }; #endif // GPTJ_H diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline index 7d4ced850548..b61ee89fca20 160000 --- a/gpt4all-backend/llama.cpp-mainline +++ b/gpt4all-backend/llama.cpp-mainline @@ -1 +1 @@ -Subproject commit 7d4ced850548642b9a1740fa25ecdef249fbf47f +Subproject commit b61ee89fca2038e9937317a794e28e08391b7888 diff --git a/gpt4all-backend/llama/placeholder b/gpt4all-backend/llama/placeholder deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 167d10ee5661..e8d2ccbf2f62 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -6,38 +6,29 @@ #include #include #include -#include -#include -#include +#include #include -#if defined(_WIN32) && defined(_MSC_VER) - #define WIN32_LEAN_AND_MEAN - #ifndef NOMINMAX - #define NOMINMAX - #endif - #include - #include - #include -#else - #include -#endif +#include #include +#include +#include +#include #include #include +#include #include #include - #ifdef GGML_USE_KOMPUTE -#include "ggml-kompute.h" +#include #endif +using namespace std::string_literals; + // Maximum supported GGUF version static constexpr int GGUF_VER_MAX = 3; -namespace { -const char *modelType_ = "LLaMA"; -} +static const char * const modelType_ = "LLaMA"; static bool llama_verbose() { const char* var = getenv("GPT4ALL_VERBOSE_LLAMACPP"); @@ -96,6 +87,56 @@ static int llama_sample_top_p_top_k( return llama_sample_token(ctx, &candidates_p); } +std::string get_arch_name(gguf_context *ctx_gguf) { + std::string arch_name; + const int kid = gguf_find_key(ctx_gguf, "general.architecture"); + enum gguf_type ktype = gguf_get_kv_type(ctx_gguf, kid); + if (ktype != (GGUF_TYPE_STRING)) { + throw std::runtime_error("ERROR: Can't get general architecture from gguf file."); + } + return gguf_get_val_str(ctx_gguf, kid); +} + +static gguf_context *load_gguf(const char *fname) { + struct gguf_init_params params = { + /*.no_alloc = */ true, + /*.ctx = */ nullptr, + }; + gguf_context *ctx = gguf_init_from_file(fname, params); + if (!ctx) { + std::cerr << __func__ << ": gguf_init_from_file failed\n"; + return nullptr; + } + + int gguf_ver = gguf_get_version(ctx); + if (gguf_ver > GGUF_VER_MAX) { + std::cerr << __func__ << ": unsupported gguf version: " << gguf_ver << "\n"; + gguf_free(ctx); + return nullptr; + } + + return ctx; +} + +static int32_t get_arch_key_u32(std::string const &modelPath, std::string const &archKey) { + auto * ctx = load_gguf(modelPath.c_str()); + auto arch = get_arch_name(ctx); + + int32_t value = -1; + if (ctx) { + auto key = arch + "." + archKey; + int keyidx = gguf_find_key(ctx, key.c_str()); + if (keyidx != -1) { + value = gguf_get_val_u32(ctx, keyidx); + } else { + std::cerr << __func__ << ": " << key << "not found in " << modelPath << "\n"; + } + } + + gguf_free(ctx); + return value; +} + struct LLamaPrivate { const std::string modelPath; bool modelLoaded; @@ -148,6 +189,42 @@ size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl) return filesize + est_kvcache_size; } +bool LLamaModel::isModelBlacklisted(const std::string &modelPath) { + auto * ctx = load_gguf(modelPath.c_str()); + if (!ctx) { + std::cerr << __func__ << ": failed to load " << modelPath << "\n"; + return false; + } + + auto get_key = [ctx, &modelPath](const char *name) { + int keyidx = gguf_find_key(ctx, name); + if (keyidx == -1) { + throw std::logic_error(name + " not found in "s + modelPath); + } + return keyidx; + }; + + bool res = false; + try { + std::string name(gguf_get_val_str(ctx, get_key("general.name"))); + int token_idx = get_key("tokenizer.ggml.tokens"); + int n_vocab = gguf_get_arr_n(ctx, token_idx); + + // check for known bad models + if (name == "open-orca_mistral-7b-openorca" + && n_vocab == 32002 + && gguf_get_arr_str(ctx, token_idx, 32000) == ""s // should be <|im_end|> + ) { + res = true; + } + } catch (const std::logic_error &e) { + std::cerr << __func__ << ": " << e.what() << "\n"; + } + + gguf_free(ctx); + return res; +} + bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl) { d_ptr->modelLoaded = false; @@ -290,12 +367,13 @@ size_t LLamaModel::restoreState(const uint8_t *src) return llama_set_state_data(d_ptr->ctx, const_cast(src)); } -std::vector LLamaModel::tokenize(PromptContext &ctx, const std::string &str) const +std::vector LLamaModel::tokenize(PromptContext &ctx, const std::string &str, bool special) const { - const bool useBOS = ctx.n_past == 0 && (ctx.tokens.empty() || ctx.tokens.front() != llama_token_bos(d_ptr->model)); - std::vector fres(str.size()+4); - // TODO(cebtenzzre): we may want to use special=true here to process special tokens - auto fres_len = llama_tokenize(d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), useBOS, false); + const bool wantBOS = ctx.n_past == 0 && ctx.tokens.empty(); + const bool useBOS = wantBOS && shouldAddBOS(); + auto strCat = wantBOS && !special ? " " + str : str; // insert leading space ourselves, llama.cpp fork doesn't anymore + std::vector fres(strCat.size()+4); + auto fres_len = llama_tokenize(d_ptr->model, strCat.c_str(), strCat.length(), fres.data(), fres.size(), useBOS, special); fres.resize(fres_len); return fres; } @@ -349,55 +427,10 @@ const std::vector &LLamaModel::endTokens() const return d_ptr->end_tokens; } -std::string get_arch_name(gguf_context *ctx_gguf) { - std::string arch_name; - const int kid = gguf_find_key(ctx_gguf, "general.architecture"); - enum gguf_type ktype = gguf_get_kv_type(ctx_gguf, kid); - if (ktype != (GGUF_TYPE_STRING)) { - throw std::runtime_error("ERROR: Can't get general architecture from gguf file."); - } - return gguf_get_val_str(ctx_gguf, kid); -} - -static gguf_context *load_gguf(const char *fname, std::string &arch) { - struct gguf_init_params params = { - /*.no_alloc = */ true, - /*.ctx = */ nullptr, - }; - gguf_context *ctx = gguf_init_from_file(fname, params); - if (!ctx) { - std::cerr << __func__ << ": gguf_init_from_file failed\n"; - return nullptr; - } - - int gguf_ver = gguf_get_version(ctx); - if (gguf_ver > GGUF_VER_MAX) { - std::cerr << __func__ << ": unsupported gguf version: " << gguf_ver << "\n"; - gguf_free(ctx); - return nullptr; - } - - arch = get_arch_name(ctx); - return ctx; -} - -static int32_t get_arch_key_u32(std::string const &modelPath, std::string const &archKey) { - std::string arch; - auto * ctx = load_gguf(modelPath.c_str(), arch); - - int32_t value = -1; - if (ctx) { - auto key = arch + "." + archKey; - int keyidx = gguf_find_key(ctx, key.c_str()); - if (keyidx != -1) { - value = gguf_get_val_u32(ctx, keyidx); - } else { - std::cerr << __func__ << ": " << key << "not found in " << modelPath << "\n"; - } - } - - gguf_free(ctx); - return value; +bool LLamaModel::shouldAddBOS() const +{ + int add_bos = llama_add_bos_token(d_ptr->model); + return add_bos != -1 ? bool(add_bos) : llama_vocab_type(d_ptr->model) == LLAMA_VOCAB_TYPE_SPM; } int32_t LLamaModel::maxContextLength(std::string const &modelPath) const @@ -513,8 +546,8 @@ DLL_EXPORT const char *get_build_variant() { } DLL_EXPORT bool magic_match(const char *fname) { - std::string arch; - auto * ctx = load_gguf(fname, arch); + auto * ctx = load_gguf(fname); + auto arch = get_arch_name(ctx); bool valid = true; diff --git a/gpt4all-backend/llamamodel_impl.h b/gpt4all-backend/llamamodel_impl.h index 27eb580b07cf..15cbe1cdb928 100644 --- a/gpt4all-backend/llamamodel_impl.h +++ b/gpt4all-backend/llamamodel_impl.h @@ -19,6 +19,7 @@ class LLamaModel : public LLModel { bool supportsEmbedding() const override { return false; } bool supportsCompletion() const override { return true; } bool loadModel(const std::string &modelPath, int n_ctx, int ngl) override; + bool isModelBlacklisted(const std::string &modelPath) override; bool isModelLoaded() const override; size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) override; size_t stateSize() const override; @@ -27,7 +28,7 @@ class LLamaModel : public LLModel { void setThreadCount(int32_t n_threads) override; int32_t threadCount() const override; std::vector availableGPUDevices(size_t memoryRequired) const override; - bool initializeGPUDevice(size_t memoryRequired, const std::string& name) const override; + bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const override; bool initializeGPUDevice(int device, std::string *unavail_reason) const override; bool hasGPUDevice() override; bool usingGPUDevice() override; @@ -36,12 +37,13 @@ class LLamaModel : public LLModel { std::unique_ptr d_ptr; protected: - std::vector tokenize(PromptContext &, const std::string&) const override; - std::string tokenToString(Token) const override; - Token sampleToken(PromptContext& ctx) const override; - bool evalTokens(PromptContext& ctx, const std::vector &tokens) const override; + std::vector tokenize(PromptContext &ctx, const std::string &str, bool special) const override; + std::string tokenToString(Token id) const override; + Token sampleToken(PromptContext &ctx) const override; + bool evalTokens(PromptContext &ctx, const std::vector &tokens) const override; int32_t contextLength() const override; - const std::vector& endTokens() const override; + const std::vector &endTokens() const override; + bool shouldAddBOS() const override; int32_t maxContextLength(std::string const &modelPath) const override; int32_t layerCount(std::string const &modelPath) const override; diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/llmodel.h index c3cc937c0f72..5ccbea08a119 100644 --- a/gpt4all-backend/llmodel.h +++ b/gpt4all-backend/llmodel.h @@ -29,23 +29,23 @@ class LLModel { class Implementation { public: - Implementation(Dlhandle&&); - Implementation(const Implementation&) = delete; - Implementation(Implementation&&); + Implementation(Dlhandle &&); + Implementation(const Implementation &) = delete; + Implementation(Implementation &&); ~Implementation(); std::string_view modelType() const { return m_modelType; } std::string_view buildVariant() const { return m_buildVariant; } - static bool isImplementation(const Dlhandle&); - static const std::vector& implementationList(); - static const Implementation *implementation(const char *fname, const std::string& buildVariant); + static bool isImplementation(const Dlhandle &dl); + static const std::vector &implementationList(); + static const Implementation *implementation(const char *fname, const std::string &buildVariant); static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto", int n_ctx = 2048); static std::vector availableGPUDevices(); static int32_t maxContextLength(const std::string &modelPath); static int32_t layerCount(const std::string &modelPath); - static void setImplementationsSearchPath(const std::string& path); - static const std::string& implementationsSearchPath(); + static void setImplementationsSearchPath(const std::string &path); + static const std::string &implementationsSearchPath(); private: static LLModel *constructDefaultLlama(); @@ -82,26 +82,30 @@ class LLModel { virtual bool supportsEmbedding() const = 0; virtual bool supportsCompletion() const = 0; virtual bool loadModel(const std::string &modelPath, int n_ctx, int ngl) = 0; + virtual bool isModelBlacklisted(const std::string &modelPath) { (void)modelPath; return false; }; virtual bool isModelLoaded() const = 0; virtual size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) = 0; virtual size_t stateSize() const { return 0; } - virtual size_t saveState(uint8_t */*dest*/) const { return 0; } - virtual size_t restoreState(const uint8_t */*src*/) { return 0; } + virtual size_t saveState(uint8_t *dest) const { (void)dest; return 0; } + virtual size_t restoreState(const uint8_t *src) { (void)src; return 0; } // This method requires the model to return true from supportsCompletion otherwise it will throw // an error virtual void prompt(const std::string &prompt, + const std::string &promptTemplate, std::function promptCallback, std::function responseCallback, std::function recalculateCallback, - PromptContext &ctx); + PromptContext &ctx, + bool special = false, + std::string *fakeReply = nullptr); virtual std::vector embedding(const std::string &text); - virtual void setThreadCount(int32_t /*n_threads*/) {} + virtual void setThreadCount(int32_t n_threads) { (void)n_threads; } virtual int32_t threadCount() const { return 1; } - const Implementation& implementation() const { + const Implementation &implementation() const { return *m_implementation; } @@ -110,7 +114,7 @@ class LLModel { return {}; } - virtual bool initializeGPUDevice(size_t memoryRequired, const std::string& name) const { + virtual bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const { (void)memoryRequired; (void)name; return false; @@ -132,12 +136,13 @@ class LLModel { protected: // These are pure virtual because subclasses need to implement as the default implementation of // 'prompt' above calls these functions - virtual std::vector tokenize(PromptContext &, const std::string&) const = 0; - virtual std::string tokenToString(Token) const = 0; + virtual std::vector tokenize(PromptContext &ctx, const std::string &str, bool special = false) const = 0; + virtual std::string tokenToString(Token id) const = 0; virtual Token sampleToken(PromptContext &ctx) const = 0; - virtual bool evalTokens(PromptContext &/*ctx*/, const std::vector& /*tokens*/) const = 0; + virtual bool evalTokens(PromptContext &ctx, const std::vector &tokens) const = 0; virtual int32_t contextLength() const = 0; - virtual const std::vector& endTokens() const = 0; + virtual const std::vector &endTokens() const = 0; + virtual bool shouldAddBOS() const = 0; virtual int32_t maxContextLength(std::string const &modelPath) const { @@ -166,6 +171,15 @@ class LLModel { return true; } + void decodePrompt(std::function promptCallback, + std::function responseCallback, + std::function recalculateCallback, + PromptContext &promptCtx, + std::vector embd_inp); + void generateResponse(std::function responseCallback, + std::function recalculateCallback, + PromptContext &promptCtx); + private: friend class LLMImplementation; }; diff --git a/gpt4all-backend/llmodel_c.cpp b/gpt4all-backend/llmodel_c.cpp index 8ba59b2b7a39..b6306a77d894 100644 --- a/gpt4all-backend/llmodel_c.cpp +++ b/gpt4all-backend/llmodel_c.cpp @@ -1,8 +1,9 @@ #include "llmodel_c.h" #include "llmodel.h" -#include #include +#include +#include #include struct LLModelWrapper { @@ -56,7 +57,14 @@ size_t llmodel_required_mem(llmodel_model model, const char *model_path, int n_c bool llmodel_loadModel(llmodel_model model, const char *model_path, int n_ctx, int ngl) { LLModelWrapper *wrapper = reinterpret_cast(model); - return wrapper->llModel->loadModel(model_path, n_ctx, ngl); + + std::string modelPath(model_path); + if (wrapper->llModel->isModelBlacklisted(modelPath)) { + size_t slash = modelPath.find_last_of("/\\"); + auto basename = slash == std::string::npos ? modelPath : modelPath.substr(slash + 1); + std::cerr << "warning: model '" << basename << "' is out-of-date, please check for an updated version\n"; + } + return wrapper->llModel->loadModel(modelPath, n_ctx, ngl); } bool llmodel_isModelLoaded(llmodel_model model) @@ -100,10 +108,12 @@ bool recalculate_wrapper(bool is_recalculating, void *user_data) { } void llmodel_prompt(llmodel_model model, const char *prompt, + const char *prompt_template, llmodel_prompt_callback prompt_callback, llmodel_response_callback response_callback, llmodel_recalculate_callback recalculate_callback, - llmodel_prompt_context *ctx) + llmodel_prompt_context *ctx, + bool special) { LLModelWrapper *wrapper = reinterpret_cast(model); @@ -131,7 +141,7 @@ void llmodel_prompt(llmodel_model model, const char *prompt, wrapper->promptContext.contextErase = ctx->context_erase; // Call the C++ prompt method - wrapper->llModel->prompt(prompt, prompt_func, response_func, recalc_func, wrapper->promptContext); + wrapper->llModel->prompt(prompt, prompt_template, prompt_func, response_func, recalc_func, wrapper->promptContext, special); // Update the C context by giving access to the wrappers raw pointers to std::vector data // which involves no copies diff --git a/gpt4all-backend/llmodel_c.h b/gpt4all-backend/llmodel_c.h index 50d35edac6dd..eac4ae9b9666 100644 --- a/gpt4all-backend/llmodel_c.h +++ b/gpt4all-backend/llmodel_c.h @@ -163,16 +163,20 @@ uint64_t llmodel_restore_state_data(llmodel_model model, const uint8_t *src); * Generate a response using the model. * @param model A pointer to the llmodel_model instance. * @param prompt A string representing the input prompt. + * @param prompt_template A string representing the input prompt template. * @param prompt_callback A callback function for handling the processing of prompt. * @param response_callback A callback function for handling the generated response. * @param recalculate_callback A callback function for handling recalculation requests. + * @param special True if special tokens in the prompt should be processed, false otherwise. * @param ctx A pointer to the llmodel_prompt_context structure. */ void llmodel_prompt(llmodel_model model, const char *prompt, + const char *prompt_template, llmodel_prompt_callback prompt_callback, llmodel_response_callback response_callback, llmodel_recalculate_callback recalculate_callback, - llmodel_prompt_context *ctx); + llmodel_prompt_context *ctx, + bool special); /** * Generate an embedding using the model. diff --git a/gpt4all-backend/llmodel_shared.cpp b/gpt4all-backend/llmodel_shared.cpp index 13c3706c0871..665da9c9fbd1 100644 --- a/gpt4all-backend/llmodel_shared.cpp +++ b/gpt4all-backend/llmodel_shared.cpp @@ -2,11 +2,20 @@ #include #include +#include #include +// TODO(cebtenzzre): replace this with llama_kv_cache_seq_shift for llamamodel (GPT-J needs this as-is) void LLModel::recalculateContext(PromptContext &promptCtx, std::function recalculate) { - size_t i = 0; - promptCtx.n_past = 0; + int n_keep = shouldAddBOS(); + const int32_t n_discard = (promptCtx.n_ctx - n_keep) * promptCtx.contextErase; + + // Erase the first percentage of context from the tokens + std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n"; + promptCtx.tokens.erase(promptCtx.tokens.begin() + n_keep, promptCtx.tokens.begin() + n_keep + n_discard); + + size_t i = n_keep; + promptCtx.n_past = n_keep; while (i < promptCtx.tokens.size()) { size_t batch_end = std::min(i + promptCtx.n_batch, promptCtx.tokens.size()); std::vector batch(promptCtx.tokens.begin() + i, promptCtx.tokens.begin() + batch_end); @@ -26,11 +35,36 @@ void LLModel::recalculateContext(PromptContext &promptCtx, std::function &placeholders, std::string &err) { + static const std::regex placeholderRegex(R"(%[1-2](?![0-9]))"); + + auto it = std::sregex_iterator(tmpl.begin(), tmpl.end(), placeholderRegex); + placeholders.clear(); + placeholders.insert(placeholders.end(), it, std::sregex_iterator()); + + if (placeholders.size() > 2) { + err = "ERROR: expected at most two placeholders, got " + std::to_string(placeholders.size()); + return false; + } + if (placeholders.size() >= 1 && placeholders[0].str() != "%1") { + err = "ERROR: first placeholder must be %1, got " + placeholders[0].str(); + return false; + } + if (placeholders.size() >= 2 && placeholders[1].str() != "%2") { + err = "ERROR: second placeholder must be %2, got " + placeholders[1].str(); + return false; + } + return true; +} + void LLModel::prompt(const std::string &prompt, + const std::string &promptTemplate, std::function promptCallback, std::function responseCallback, std::function recalculateCallback, - PromptContext &promptCtx) + PromptContext &promptCtx, + bool special, + std::string *fakeReply) { if (!isModelLoaded()) { std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n"; @@ -38,15 +72,86 @@ void LLModel::prompt(const std::string &prompt, } if (!supportsCompletion()) { - std::string errorMessage = "ERROR: this model does not support text completion or chat!\n"; + std::string errorMessage = "ERROR: this model does not support text completion or chat!"; responseCallback(-1, errorMessage); - std::cerr << implementation().modelType() << errorMessage; + std::cerr << implementation().modelType() << " " << errorMessage << "\n"; return; } - // tokenize the prompt - std::vector embd_inp = tokenize(promptCtx, prompt); + // parse the prompt template + std::vector placeholders; + { + std::string err; + if (!parsePromptTemplate(promptTemplate, placeholders, err)) { + responseCallback(-1, err); + std::cerr << err << "\n"; + return; + } + } + + auto old_n_past = promptCtx.n_past; // prepare to fake n_past for tokenize + // tokenize the user prompt + std::vector embd_inp; + if (placeholders.empty()) { + // this is unusual, but well-defined + std::cerr << __func__ << ": prompt template has no placeholder\n"; + embd_inp = tokenize(promptCtx, promptTemplate, true); + } else { + // template: beginning of user prompt + const auto &phUser = placeholders[0]; + std::string userPrefix(phUser.prefix()); + if (!userPrefix.empty()) { + embd_inp = tokenize(promptCtx, userPrefix, true); + promptCtx.n_past += embd_inp.size(); + } + + // user input (shouldn't have special token processing) + auto tokens = tokenize(promptCtx, prompt, special); + embd_inp.insert(embd_inp.end(), tokens.begin(), tokens.end()); + promptCtx.n_past += tokens.size(); + + // template: end of user prompt + start of assistant prompt + size_t start = phUser.position() + phUser.length(); + size_t end = placeholders.size() >= 2 ? placeholders[1].position() : promptTemplate.length(); + auto userToAsst = promptTemplate.substr(start, end - start); + if (!userToAsst.empty()) { + tokens = tokenize(promptCtx, userToAsst, true); + embd_inp.insert(embd_inp.end(), tokens.begin(), tokens.end()); + promptCtx.n_past += tokens.size(); + } + } + + promptCtx.n_past = old_n_past; // restore n_past so decodePrompt can increment it + + // decode the user prompt + decodePrompt(promptCallback, responseCallback, recalculateCallback, promptCtx, embd_inp); + + // decode the assistant's reply, either generated or spoofed + if (fakeReply == nullptr) { + generateResponse(responseCallback, recalculateCallback, promptCtx); + } else { + embd_inp = tokenize(promptCtx, *fakeReply, false); + decodePrompt(promptCallback, responseCallback, recalculateCallback, promptCtx, embd_inp); + } + + // decode the rest of the prompt template + if (placeholders.size() >= 2) { + // template: end of assistant prompt + size_t start = placeholders[1].position() + placeholders[1].length(); + auto asstSuffix = promptTemplate.substr(start); + if (!asstSuffix.empty()) { + embd_inp = tokenize(promptCtx, asstSuffix, true); + decodePrompt(promptCallback, responseCallback, recalculateCallback, promptCtx, embd_inp); + } + } +} + +void LLModel::decodePrompt(std::function promptCallback, + std::function responseCallback, + std::function recalculateCallback, + PromptContext &promptCtx, + std::vector embd_inp) { // save the context size promptCtx.n_ctx = contextLength(); @@ -69,11 +174,6 @@ void LLModel::prompt(const std::string &prompt, // Check if the context has run out... if (promptCtx.n_past + int32_t(batch.size()) > promptCtx.n_ctx) { - const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase; - // Erase the first percentage of context from the tokens... - std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n"; - promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint); - promptCtx.n_past = promptCtx.tokens.size(); recalculateContext(promptCtx, recalculateCallback); assert(promptCtx.n_past + int32_t(batch.size()) <= promptCtx.n_ctx); } @@ -94,7 +194,11 @@ void LLModel::prompt(const std::string &prompt, } i = batch_end; } +} +void LLModel::generateResponse(std::function responseCallback, + std::function recalculateCallback, + PromptContext &promptCtx) { std::string cachedResponse; std::vector cachedTokens; std::unordered_set reversePrompts @@ -108,11 +212,6 @@ void LLModel::prompt(const std::string &prompt, // Check if the context has run out... if (promptCtx.n_past + 1 > promptCtx.n_ctx) { - const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase; - // Erase the first percentage of context from the tokens... - std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n"; - promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint); - promptCtx.n_past = promptCtx.tokens.size(); recalculateContext(promptCtx, recalculateCallback); assert(promptCtx.n_past + 1 <= promptCtx.n_ctx); } @@ -165,8 +264,9 @@ void LLModel::prompt(const std::string &prompt, } } -std::vector LLModel::embedding(const std::string &/*text*/) +std::vector LLModel::embedding(const std::string &text) { + (void)text; if (!supportsCompletion()) { std::string errorMessage = "ERROR: this model does not support generating embeddings!\n"; std::cerr << implementation().modelType() << errorMessage; diff --git a/gpt4all-bindings/python/docs/gpt4all_python.md b/gpt4all-bindings/python/docs/gpt4all_python.md index dd4f6d7f35d1..7e56fabeec5a 100644 --- a/gpt4all-bindings/python/docs/gpt4all_python.md +++ b/gpt4all-bindings/python/docs/gpt4all_python.md @@ -246,90 +246,6 @@ To do the same outside a session, the input has to be formatted manually. For ex The colors in my previous response are blue, green and red. ``` -Ultimately, the method `GPT4All._format_chat_prompt_template()` is responsible for formatting templates. It can be -customized in a subclass. As an example: - -=== "Custom Subclass" - ``` py - from itertools import cycle - from gpt4all import GPT4All - - class RotatingTemplateGPT4All(GPT4All): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._templates = [ - "Respond like a pirate.", - "Respond like a politician.", - "Respond like a philosopher.", - "Respond like a Klingon.", - ] - self._cycling_templates = cycle(self._templates) - - def _format_chat_prompt_template( - self, - messages: list, - default_prompt_header: str = "", - default_prompt_footer: str = "", - ) -> str: - full_prompt = default_prompt_header + "\n\n" if default_prompt_header != "" else "" - for message in messages: - if message["role"] == "user": - user_message = f"USER: {message['content']} {next(self._cycling_templates)}\n" - full_prompt += user_message - if message["role"] == "assistant": - assistant_message = f"ASSISTANT: {message['content']}\n" - full_prompt += assistant_message - full_prompt += "\n\n" + default_prompt_footer if default_prompt_footer != "" else "" - print(full_prompt) - return full_prompt - ``` -=== "GPT4All Custom Subclass Example" - ``` py - model = RotatingTemplateGPT4All('wizardlm-13b-v1.2.Q4_0.gguf') - with model.chat_session(): # starting a session is optional in this example - response1 = model.generate("hi, who are you?") - print(response1) - print() - response2 = model.generate("what can you tell me about snakes?") - print(response2) - print() - response3 = model.generate("what's your opinion on Chess?") - print(response3) - print() - response4 = model.generate("tell me about ancient Rome.") - print(response4) - ``` -=== "Possible Output" - ``` - USER: hi, who are you? Respond like a pirate. - - Pirate: Ahoy there mateys! I be Cap'n Jack Sparrow of the Black Pearl. - - USER: what can you tell me about snakes? Respond like a politician. - - Politician: Snakes have been making headlines lately due to their ability to - slither into tight spaces and evade capture, much like myself during my last - election campaign. However, I believe that with proper education and - understanding of these creatures, we can work together towards creating a - safer environment for both humans and snakes alike. - - USER: what's your opinion on Chess? Respond like a philosopher. - - Philosopher: The game of chess is often used as an analogy to illustrate the - complexities of life and decision-making processes. However, I believe that it - can also be seen as a reflection of our own consciousness and subconscious mind. - Just as each piece on the board has its unique role to play in shaping the - outcome of the game, we too have different roles to fulfill in creating our own - personal narrative. - - USER: tell me about ancient Rome. Respond like a Klingon. - - Klingon: Ancient Rome was once a great empire that ruled over much of Europe and - the Mediterranean region. However, just as the Empire fell due to internal strife - and external threats, so too did my own house come crashing down when I failed to - protect our homeworld from invading forces. - ``` - ### Introspection A less apparent feature is the capacity to log the final prompt that gets sent to the model. It relies on diff --git a/gpt4all-bindings/python/gpt4all/_pyllmodel.py b/gpt4all-bindings/python/gpt4all/_pyllmodel.py index eb03a91443fe..9aaa94c10208 100644 --- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py +++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py @@ -89,10 +89,12 @@ class LLModelGPUDevice(ctypes.Structure): llmodel.llmodel_prompt.argtypes = [ ctypes.c_void_p, ctypes.c_char_p, + ctypes.c_char_p, PromptCallback, ResponseCallback, RecalculateCallback, ctypes.POINTER(LLModelPromptContext), + ctypes.c_bool, ] llmodel.llmodel_prompt.restype = None @@ -290,6 +292,7 @@ def generate_embedding(self, text: str) -> List[float]: def prompt_model( self, prompt: str, + prompt_template: str, callback: ResponseCallbackType, n_predict: int = 4096, top_k: int = 40, @@ -300,6 +303,7 @@ def prompt_model( repeat_last_n: int = 10, context_erase: float = 0.75, reset_context: bool = False, + special: bool = False, ): """ Generate response from model from a prompt. @@ -326,9 +330,6 @@ def prompt_model( prompt, ) - prompt_bytes = prompt.encode() - prompt_ptr = ctypes.c_char_p(prompt_bytes) - self._set_context( n_predict=n_predict, top_k=top_k, @@ -343,16 +344,18 @@ def prompt_model( llmodel.llmodel_prompt( self.model, - prompt_ptr, + ctypes.c_char_p(prompt.encode()), + ctypes.c_char_p(prompt_template.encode()), PromptCallback(self._prompt_callback), ResponseCallback(self._callback_decoder(callback)), RecalculateCallback(self._recalculate_callback), self.context, + special, ) def prompt_model_streaming( - self, prompt: str, callback: ResponseCallbackType = empty_response_callback, **kwargs + self, prompt: str, prompt_template: str, callback: ResponseCallbackType = empty_response_callback, **kwargs ) -> Iterable[str]: output_queue: Queue[str | Sentinel] = Queue() @@ -369,15 +372,15 @@ def _generator_callback(token_id: int, response: str): return _generator_callback - def run_llmodel_prompt(prompt: str, callback: ResponseCallbackType, **kwargs): - self.prompt_model(prompt, callback, **kwargs) + def run_llmodel_prompt(prompt: str, prompt_template: str, callback: ResponseCallbackType, **kwargs): + self.prompt_model(prompt, prompt_template, callback, **kwargs) output_queue.put(Sentinel.TERMINATING_SYMBOL) # Kick off llmodel_prompt in separate thread so we can return generator # immediately thread = threading.Thread( target=run_llmodel_prompt, - args=(prompt, _generator_callback_wrapper(callback)), + args=(prompt, prompt_template, _generator_callback_wrapper(callback)), kwargs=kwargs, ) thread.start() diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py index 02fa1c806bb9..82342b28babf 100644 --- a/gpt4all-bindings/python/gpt4all/gpt4all.py +++ b/gpt4all-bindings/python/gpt4all/gpt4all.py @@ -4,8 +4,10 @@ from __future__ import annotations import os +import re import sys import time +import warnings from contextlib import contextmanager from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Union @@ -314,6 +316,10 @@ def generate( Either the entire completion or a generator that yields the completion token by token. """ + if re.search(r"%1(?![0-9])", self._current_prompt_template): + raise ValueError("Prompt template containing a literal '%1' is not supported. For a prompt " + "placeholder, please use '{0}' instead.") + # Preparing the model request generate_kwargs: Dict[str, Any] = dict( temp=temp, @@ -327,16 +333,29 @@ def generate( if self._is_chat_session_activated: # check if there is only one message, i.e. system prompt: - generate_kwargs["reset_context"] = len(self.current_chat_session) == 1 + reset = len(self.current_chat_session) == 1 + generate_kwargs["reset_context"] = reset self.current_chat_session.append({"role": "user", "content": prompt}) - prompt = self._format_chat_prompt_template( - messages=self.current_chat_session[-1:], - default_prompt_header=self.current_chat_session[0]["content"] - if generate_kwargs["reset_context"] - else "", - ) + if self._format_chat_prompt_template.__func__ is GPT4All._format_chat_prompt_template: + if reset: + # ingest system prompt + self.model.prompt_model(self.current_chat_session[0]["content"], "%1", + n_batch=n_batch, n_predict=0, special=True) + prompt_template = self._current_prompt_template.format("%1") + else: + warnings.warn( + "_format_chat_prompt_template is deprecated. Please use a chat session with a prompt template.", + DeprecationWarning, + ) + # special tokens won't be processed + prompt = self._format_chat_prompt_template( + self.current_chat_session[-1:], + self.current_chat_session[0]["content"] if reset else "", + ) + prompt_template = "%1" else: + prompt_template = "%1" generate_kwargs["reset_context"] = True # Prepare the callback, process the model response @@ -365,14 +384,16 @@ def _callback(token_id: int, response: str) -> bool: # Send the request to the model if streaming: return self.model.prompt_model_streaming( - prompt=prompt, - callback=_callback_wrapper(callback, output_collector), + prompt, + prompt_template, + _callback_wrapper(callback, output_collector), **generate_kwargs, ) self.model.prompt_model( - prompt=prompt, - callback=_callback_wrapper(callback, output_collector), + prompt, + prompt_template, + _callback_wrapper(callback, output_collector), **generate_kwargs, ) @@ -423,24 +444,6 @@ def _format_chat_prompt_template( Formatted prompt. """ - if isinstance(default_prompt_header, bool): - import warnings - - warnings.warn( - "Using True/False for the 'default_prompt_header' is deprecated. Use a string instead.", - DeprecationWarning, - ) - default_prompt_header = "" - - if isinstance(default_prompt_footer, bool): - import warnings - - warnings.warn( - "Using True/False for the 'default_prompt_footer' is deprecated. Use a string instead.", - DeprecationWarning, - ) - default_prompt_footer = "" - full_prompt = default_prompt_header + "\n\n" if default_prompt_header != "" else "" for message in messages: diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py index c76f1b49254b..e13bca6c07a2 100644 --- a/gpt4all-bindings/python/setup.py +++ b/gpt4all-bindings/python/setup.py @@ -68,7 +68,7 @@ def get_long_description(): setup( name=package_name, - version="2.2.1.post1", + version="2.3.0", description="Python bindings for GPT4All", long_description=get_long_description(), long_description_content_type="text/markdown", diff --git a/gpt4all-chat/chatgpt.cpp b/gpt4all-chat/chatgpt.cpp index 5f3da91d180a..0575ee8ee22b 100644 --- a/gpt4all-chat/chatgpt.cpp +++ b/gpt4all-chat/chatgpt.cpp @@ -75,13 +75,18 @@ size_t ChatGPT::restoreState(const uint8_t *src) } void ChatGPT::prompt(const std::string &prompt, + const std::string &promptTemplate, std::function promptCallback, std::function responseCallback, std::function recalculateCallback, - PromptContext &promptCtx) { + PromptContext &promptCtx, + bool special, + std::string *fakeReply) { Q_UNUSED(promptCallback); Q_UNUSED(recalculateCallback); + Q_UNUSED(special); + Q_UNUSED(fakeReply); // FIXME(cebtenzzre): I broke ChatGPT if (!isModelLoaded()) { std::cerr << "ChatGPT ERROR: prompt won't work with an unloaded model!\n"; @@ -109,7 +114,7 @@ void ChatGPT::prompt(const std::string &prompt, QJsonObject promptObject; promptObject.insert("role", "user"); - promptObject.insert("content", QString::fromStdString(prompt)); + promptObject.insert("content", QString::fromStdString(promptTemplate).arg(QString::fromStdString(prompt))); messages.append(promptObject); root.insert("messages", messages); diff --git a/gpt4all-chat/chatgpt.h b/gpt4all-chat/chatgpt.h index 11d84606bccf..2656c6f763c4 100644 --- a/gpt4all-chat/chatgpt.h +++ b/gpt4all-chat/chatgpt.h @@ -1,6 +1,8 @@ #ifndef CHATGPT_H #define CHATGPT_H +#include + #include #include #include @@ -55,10 +57,13 @@ class ChatGPT : public QObject, public LLModel { size_t saveState(uint8_t *dest) const override; size_t restoreState(const uint8_t *src) override; void prompt(const std::string &prompt, + const std::string &promptTemplate, std::function promptCallback, std::function responseCallback, std::function recalculateCallback, - PromptContext &ctx) override; + PromptContext &ctx, + bool special, + std::string *fakeReply) override; void setThreadCount(int32_t n_threads) override; int32_t threadCount() const override; @@ -69,7 +74,7 @@ class ChatGPT : public QObject, public LLModel { QList context() const { return m_context; } void setContext(const QList &context) { m_context = context; } - bool callResponse(int32_t token, const std::string& string); + bool callResponse(int32_t token, const std::string &string); Q_SIGNALS: void request(const QString &apiKey, @@ -80,12 +85,41 @@ class ChatGPT : public QObject, public LLModel { // We have to implement these as they are pure virtual in base class, but we don't actually use // them as they are only called from the default implementation of 'prompt' which we override and // completely replace - std::vector tokenize(PromptContext &, const std::string&) const override { return std::vector(); } - std::string tokenToString(Token) const override { return std::string(); } - Token sampleToken(PromptContext &ctx) const override { return -1; } - bool evalTokens(PromptContext &/*ctx*/, const std::vector& /*tokens*/) const override { return false; } - int32_t contextLength() const override { return -1; } - const std::vector& endTokens() const override { static const std::vector fres; return fres; } + + std::vector tokenize(PromptContext &ctx, const std::string &str, bool special) const override { + (void)ctx; + (void)str; + (void)special; + throw std::logic_error("not implemented"); + } + + std::string tokenToString(Token id) const override { + (void)id; + throw std::logic_error("not implemented"); + } + + Token sampleToken(PromptContext &ctx) const override { + (void)ctx; + throw std::logic_error("not implemented"); + } + + bool evalTokens(PromptContext &ctx, const std::vector &tokens) const override { + (void)ctx; + (void)tokens; + throw std::logic_error("not implemented"); + } + + int32_t contextLength() const override { + throw std::logic_error("not implemented"); + } + + const std::vector &endTokens() const override { + throw std::logic_error("not implemented"); + } + + bool shouldAddBOS() const override { + throw std::logic_error("not implemented"); + } private: std::function m_responseCallback; diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index 750e85485b41..aa19b69601b1 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -303,6 +303,9 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) m_llModelInfo.model = LLModel::Implementation::construct(filePath.toStdString(), buildVariant, n_ctx); if (m_llModelInfo.model) { + if (m_llModelInfo.model->isModelBlacklisted(filePath.toStdString())) { + // TODO(cebtenzzre): warn that this model is out-of-date + } m_llModelInfo.model->setProgressCallback([this](float progress) -> bool { emit modelLoadingPercentageChanged(progress); @@ -588,14 +591,11 @@ bool ChatLLM::promptInternal(const QList &collectionList, const QString } // Augment the prompt template with the results if any - QList augmentedTemplate; + QList docsContext; if (!databaseResults.isEmpty()) - augmentedTemplate.append("### Context:"); + docsContext.append("### Context:"); for (const ResultInfo &info : databaseResults) - augmentedTemplate.append(info.text); - augmentedTemplate.append(promptTemplate); - - QString instructPrompt = augmentedTemplate.join("\n").arg(prompt); + docsContext.append(info.text); int n_threads = MySettings::globalInstance()->threadCount(); @@ -605,7 +605,6 @@ bool ChatLLM::promptInternal(const QList &collectionList, const QString std::placeholders::_2); auto recalcFunc = std::bind(&ChatLLM::handleRecalculate, this, std::placeholders::_1); emit promptProcessing(); - qint32 logitsBefore = m_ctx.logits.size(); m_ctx.n_predict = n_predict; m_ctx.top_k = top_k; m_ctx.top_p = top_p; @@ -615,11 +614,16 @@ bool ChatLLM::promptInternal(const QList &collectionList, const QString m_ctx.repeat_last_n = repeat_penalty_tokens; m_llModelInfo.model->setThreadCount(n_threads); #if defined(DEBUG) - printf("%s", qPrintable(instructPrompt)); + printf("%s", qPrintable(prompt)); fflush(stdout); #endif m_timer->start(); - m_llModelInfo.model->prompt(instructPrompt.toStdString(), promptFunc, responseFunc, recalcFunc, m_ctx); + if (!docsContext.isEmpty()) { + auto old_n_predict = std::exchange(m_ctx.n_predict, 0); // decode localdocs context without a response + m_llModelInfo.model->prompt(docsContext.join("\n").toStdString(), "%1", promptFunc, responseFunc, recalcFunc, m_ctx); + m_ctx.n_predict = old_n_predict; // now we are ready for a response + } + m_llModelInfo.model->prompt(prompt.toStdString(), promptTemplate.toStdString(), promptFunc, responseFunc, recalcFunc, m_ctx); #if defined(DEBUG) printf("\n"); fflush(stdout); @@ -720,7 +724,7 @@ void ChatLLM::generateName() printf("%s", qPrintable(instructPrompt)); fflush(stdout); #endif - m_llModelInfo.model->prompt(instructPrompt.toStdString(), promptFunc, responseFunc, recalcFunc, ctx); + m_llModelInfo.model->prompt(instructPrompt.toStdString(), "%1", promptFunc, responseFunc, recalcFunc, ctx); #if defined(DEBUG) printf("\n"); fflush(stdout); @@ -780,16 +784,6 @@ bool ChatLLM::handleSystemPrompt(int32_t token) return !m_stopGenerating; } -bool ChatLLM::handleSystemResponse(int32_t token, const std::string &response) -{ -#if defined(DEBUG) - qDebug() << "system response" << m_llmThread.objectName() << token << response << m_stopGenerating; -#endif - Q_UNUSED(token); - Q_UNUSED(response); - return false; -} - bool ChatLLM::handleSystemRecalculate(bool isRecalc) { #if defined(DEBUG) @@ -808,16 +802,6 @@ bool ChatLLM::handleRestoreStateFromTextPrompt(int32_t token) return !m_stopGenerating; } -bool ChatLLM::handleRestoreStateFromTextResponse(int32_t token, const std::string &response) -{ -#if defined(DEBUG) - qDebug() << "restore state from text response" << m_llmThread.objectName() << token << response << m_stopGenerating; -#endif - Q_UNUSED(token); - Q_UNUSED(response); - return false; -} - bool ChatLLM::handleRestoreStateFromTextRecalculate(bool isRecalc) { #if defined(DEBUG) @@ -1027,8 +1011,6 @@ void ChatLLM::processSystemPrompt() m_ctx = LLModel::PromptContext(); auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1); - auto responseFunc = std::bind(&ChatLLM::handleSystemResponse, this, std::placeholders::_1, - std::placeholders::_2); auto recalcFunc = std::bind(&ChatLLM::handleSystemRecalculate, this, std::placeholders::_1); const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo); @@ -1051,7 +1033,9 @@ void ChatLLM::processSystemPrompt() printf("%s", qPrintable(QString::fromStdString(systemPrompt))); fflush(stdout); #endif - m_llModelInfo.model->prompt(systemPrompt, promptFunc, responseFunc, recalcFunc, m_ctx); + auto old_n_predict = std::exchange(m_ctx.n_predict, 0); // decode system prompt without a response + m_llModelInfo.model->prompt(systemPrompt, "%1", promptFunc, nullptr, recalcFunc, m_ctx, true); + m_ctx.n_predict = old_n_predict; #if defined(DEBUG) printf("\n"); fflush(stdout); @@ -1073,8 +1057,6 @@ void ChatLLM::processRestoreStateFromText() m_ctx = LLModel::PromptContext(); auto promptFunc = std::bind(&ChatLLM::handleRestoreStateFromTextPrompt, this, std::placeholders::_1); - auto responseFunc = std::bind(&ChatLLM::handleRestoreStateFromTextResponse, this, std::placeholders::_1, - std::placeholders::_2); auto recalcFunc = std::bind(&ChatLLM::handleRestoreStateFromTextRecalculate, this, std::placeholders::_1); const QString promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo); @@ -1094,9 +1076,19 @@ void ChatLLM::processRestoreStateFromText() m_ctx.repeat_penalty = repeat_penalty; m_ctx.repeat_last_n = repeat_penalty_tokens; m_llModelInfo.model->setThreadCount(n_threads); - for (auto pair : m_stateFromText) { - const QString str = pair.first == "Prompt: " ? promptTemplate.arg(pair.second) : pair.second; - m_llModelInfo.model->prompt(str.toStdString(), promptFunc, responseFunc, recalcFunc, m_ctx); + + auto it = m_stateFromText.begin(); + while (it < m_stateFromText.end()) { + auto &prompt = *it++; + Q_ASSERT(prompt.first == "Prompt: "); + Q_ASSERT(it < m_stateFromText.end()); + + auto &response = *it++; + Q_ASSERT(response.first != "Prompt: "); + auto responseText = response.second.toStdString(); + + m_llModelInfo.model->prompt(prompt.second.toStdString(), promptTemplate.toStdString(), promptFunc, nullptr, + recalcFunc, m_ctx, false, &responseText); } if (!m_stopGenerating) { diff --git a/gpt4all-chat/metadata/models2.json b/gpt4all-chat/metadata/models2.json index 5096cd032228..903e7ad6bd2a 100644 --- a/gpt4all-chat/metadata/models2.json +++ b/gpt4all-chat/metadata/models2.json @@ -17,10 +17,10 @@ }, { "order": "b", - "md5sum": "48de9538c774188eb25a7e9ee024bbd3", + "md5sum": "f692417a22405d80573ac10cb0cd6c6a", "name": "Mistral OpenOrca", - "filename": "mistral-7b-openorca.Q4_0.gguf", - "filesize": "4108927744", + "filename": "mistral-7b-openorca.Q4_0.gguf2.gguf", + "filesize": "4108928128", "requires": "2.5.0", "ramrequired": "8", "parameters": "7 billion", @@ -28,7 +28,7 @@ "type": "Mistral", "description": "Best overall fast chat model
      • Fast responses
      • Chat based model
      • Trained by Mistral AI
      • Finetuned on OpenOrca dataset curated via Nomic Atlas
      • Licensed for commercial use
      ", "url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.Q4_0.gguf", - "promptTemplate": "<|im_start|>user\n%1<|im_end|><|im_start|>assistant\n", + "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n", "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>" }, { @@ -152,7 +152,7 @@ "type": "MPT", "description": "Good model with novel architecture
      • Fast responses
      • Chat based
      • Trained by Mosaic ML
      • Cannot be used commercially
      ", "url": "https://gpt4all.io/models/gguf/mpt-7b-chat-newbpe-q4_0.gguf", - "promptTemplate": "<|im_start|>user\n%1<|im_end|><|im_start|>assistant\n", + "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n", "systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>" }, { diff --git a/gpt4all-chat/modellist.cpp b/gpt4all-chat/modellist.cpp index 7d07e4c5fc88..10881e880589 100644 --- a/gpt4all-chat/modellist.cpp +++ b/gpt4all-chat/modellist.cpp @@ -951,7 +951,7 @@ void ModelList::updateModelsFromDirectory() processDirectory(localPath); } -#define MODELS_VERSION 2 +#define MODELS_VERSION 3 void ModelList::updateModelsFromJson() { From 67bbce43abf55c25c937a2339fa2de72acc23058 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Wed, 21 Feb 2024 16:05:49 -0500 Subject: [PATCH 15/17] Fix state issues with reloading model. Signed-off-by: Adam Treat --- gpt4all-chat/chatllm.cpp | 6 +++++- gpt4all-chat/main.qml | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index aa19b69601b1..d0c9d33b1f6a 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -672,7 +672,11 @@ void ChatLLM::unloadModel() if (!isModelLoaded() || m_isServer) return; - emit modelLoadingPercentageChanged(0.0f); + if (!m_forceUnloadModel || !m_shouldBeLoaded) + emit modelLoadingPercentageChanged(0.0f); + else + emit modelLoadingPercentageChanged(std::numeric_limits::min()); // small non-zero positive value + saveState(); #if defined(DEBUG_MODEL_LOADING) qDebug() << "unloadModel" << m_llmThread.objectName() << m_llModelInfo.model; diff --git a/gpt4all-chat/main.qml b/gpt4all-chat/main.qml index 7bacb6cb2b79..70fe6dae9170 100644 --- a/gpt4all-chat/main.qml +++ b/gpt4all-chat/main.qml @@ -463,7 +463,7 @@ Window { MyMiniButton { id: ejectButton - visible: currentChat.isModelLoaded + visible: currentChat.isModelLoaded && !window.isCurrentlyLoading z: 500 anchors.right: parent.right anchors.rightMargin: 50 From ef0a67eb940a57f88172817b186a439809360e46 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 21 Feb 2024 16:18:26 -0500 Subject: [PATCH 16/17] models: remove gemma from models2.json and models3.json (#1995) Signed-off-by: Jared Van Bortel --- gpt4all-chat/metadata/models2.json | 48 ++++++++++-------------------- gpt4all-chat/metadata/models3.json | 48 ++++++++++-------------------- 2 files changed, 32 insertions(+), 64 deletions(-) diff --git a/gpt4all-chat/metadata/models2.json b/gpt4all-chat/metadata/models2.json index 903e7ad6bd2a..4d6c6a372819 100644 --- a/gpt4all-chat/metadata/models2.json +++ b/gpt4all-chat/metadata/models2.json @@ -1,22 +1,6 @@ [ { "order": "a", - "md5sum": "6d1ca6e9533d177361fe2612a2c87474", - "name": "Gemma Instruct", - "filename": "gemma-7b-it.Q4_0.gguf", - "filesize": "4809316512", - "requires": "2.7.1", - "ramrequired": "8", - "parameters": "7 billion", - "quant": "q4_0", - "type": "Gemma", - "description": "A state-of-the-art open model from Google
      • Fast responses
      • Chat based model
      • Trained by Google
      • Licensed for commercial use
      • Gemma is provided under and subject to the Gemma Terms of Use found at ai.google.dev/gemma/terms
      ", - "url": "https://gpt4all.io/models/gguf/gemma-7b-it.Q4_0.gguf", - "promptTemplate": "user\n%1\nmodel\n", - "systemPrompt": "" - }, - { - "order": "b", "md5sum": "f692417a22405d80573ac10cb0cd6c6a", "name": "Mistral OpenOrca", "filename": "mistral-7b-openorca.Q4_0.gguf2.gguf", @@ -31,6 +15,22 @@ "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n", "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>" }, + { + "order": "b", + "md5sum": "97463be739b50525df56d33b26b00852", + "name": "Mistral Instruct", + "filename": "mistral-7b-instruct-v0.1.Q4_0.gguf", + "filesize": "4108916384", + "requires": "2.5.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Mistral", + "systemPrompt": " ", + "description": "Best overall fast instruction following model
      • Fast responses
      • Trained by Mistral AI
      • Uncensored
      • Licensed for commercial use
      ", + "url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf", + "promptTemplate": "[INST] %1 [/INST]" + }, { "order": "c", "md5sum": "c4c78adf744d6a20f05c8751e3961b84", @@ -47,22 +47,6 @@ "url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf", "promptTemplate": "### Instruction:\n%1\n### Response:\n" }, - { - "order": "d", - "md5sum": "97463be739b50525df56d33b26b00852", - "name": "Mistral Instruct", - "filename": "mistral-7b-instruct-v0.1.Q4_0.gguf", - "filesize": "4108916384", - "requires": "2.5.0", - "ramrequired": "8", - "parameters": "7 billion", - "quant": "q4_0", - "type": "Mistral", - "systemPrompt": " ", - "description": "Best overall fast instruction following model
      • Fast responses
      • Trained by Mistral AI
      • Uncensored
      • Licensed for commercial use
      ", - "url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf", - "promptTemplate": "[INST] %1 [/INST]" - }, { "order": "e", "md5sum": "00c8593ba57f5240f59662367b3ed4a5", diff --git a/gpt4all-chat/metadata/models3.json b/gpt4all-chat/metadata/models3.json index 5e33ca0f88b0..df6c12eb0468 100644 --- a/gpt4all-chat/metadata/models3.json +++ b/gpt4all-chat/metadata/models3.json @@ -1,22 +1,6 @@ [ { "order": "a", - "md5sum": "6d1ca6e9533d177361fe2612a2c87474", - "name": "Gemma Instruct", - "filename": "gemma-7b-it.Q4_0.gguf", - "filesize": "4809316512", - "requires": "2.7.1", - "ramrequired": "8", - "parameters": "7 billion", - "quant": "q4_0", - "type": "Gemma", - "description": "A state-of-the-art open model from Google
      • Fast responses
      • Chat based model
      • Trained by Google
      • Licensed for commercial use
      • Gemma is provided under and subject to the Gemma Terms of Use found at ai.google.dev/gemma/terms
      ", - "url": "https://gpt4all.io/models/gguf/gemma-7b-it.Q4_0.gguf", - "promptTemplate": "user\n%1\nmodel\n%2\n", - "systemPrompt": "" - }, - { - "order": "b", "md5sum": "f692417a22405d80573ac10cb0cd6c6a", "name": "Mistral OpenOrca", "filename": "mistral-7b-openorca.Q4_0.gguf2.gguf", @@ -31,6 +15,22 @@ "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>" }, + { + "order": "b", + "md5sum": "97463be739b50525df56d33b26b00852", + "name": "Mistral Instruct", + "filename": "mistral-7b-instruct-v0.1.Q4_0.gguf", + "filesize": "4108916384", + "requires": "2.5.0", + "ramrequired": "8", + "parameters": "7 billion", + "quant": "q4_0", + "type": "Mistral", + "systemPrompt": " ", + "description": "Best overall fast instruction following model
      • Fast responses
      • Trained by Mistral AI
      • Uncensored
      • Licensed for commercial use
      ", + "url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf", + "promptTemplate": "[INST] %1 [/INST]" + }, { "order": "c", "md5sum": "c4c78adf744d6a20f05c8751e3961b84", @@ -47,22 +47,6 @@ "url": "https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf", "promptTemplate": "### Instruction:\n%1\n### Response:\n" }, - { - "order": "d", - "md5sum": "97463be739b50525df56d33b26b00852", - "name": "Mistral Instruct", - "filename": "mistral-7b-instruct-v0.1.Q4_0.gguf", - "filesize": "4108916384", - "requires": "2.5.0", - "ramrequired": "8", - "parameters": "7 billion", - "quant": "q4_0", - "type": "Mistral", - "systemPrompt": " ", - "description": "Best overall fast instruction following model
      • Fast responses
      • Trained by Mistral AI
      • Uncensored
      • Licensed for commercial use
      ", - "url": "https://gpt4all.io/models/gguf/mistral-7b-instruct-v0.1.Q4_0.gguf", - "promptTemplate": "[INST] %1 [/INST]" - }, { "order": "e", "md5sum": "00c8593ba57f5240f59662367b3ed4a5", From a010a8a7ca3020e14a5e6c08a3426e0d987eef75 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Wed, 21 Feb 2024 16:53:47 -0500 Subject: [PATCH 17/17] Bump version and release notes for v2.7.1 Signed-off-by: Adam Treat --- gpt4all-chat/CMakeLists.txt | 2 +- gpt4all-chat/metadata/release.json | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 0f9d0ab0f2e9..076e3c0b6fd2 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -18,7 +18,7 @@ endif() set(APP_VERSION_MAJOR 2) set(APP_VERSION_MINOR 7) -set(APP_VERSION_PATCH 1) +set(APP_VERSION_PATCH 2) set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}") # Include the binary directory for the generated header file diff --git a/gpt4all-chat/metadata/release.json b/gpt4all-chat/metadata/release.json index bd5b9b6836db..1ca17c3afbf9 100644 --- a/gpt4all-chat/metadata/release.json +++ b/gpt4all-chat/metadata/release.json @@ -683,6 +683,28 @@ * Jared Van Bortel (Nomic AI) * Adam Treat (Nomic AI) * Community (beta testers, bug reporters, bindings authors) +" + }, + { + "version": "2.7.1", + "notes": +" +* Update to latest llama.cpp with support for Google Gemma +* Gemma, Phi and Phi-2, Qwen2, and StableLM are now all GPU accelerated +* Large revamp of the model loading to support explicit unload/reload +* Bugfixes for ChatML and improved version of Mistral OpenOrca +* We no longer load a model by default on application start +* We no longer load a model by default on chat context switch +* Fixes for visual artifacts in update reminder dialog +* Blacklist Intel GPU's for now as we don't support yet +* Fixes for binary save/restore of chat +* Save and restore of window geometry across application starts +", + "contributors": +" +* Jared Van Bortel (Nomic AI) +* Adam Treat (Nomic AI) +* Community (beta testers, bug reporters, bindings authors) " } ]