From 2da73658de44679ee0424dc084167310c22b9e3f Mon Sep 17 00:00:00 2001 From: Kevin On <40454531+kevin-on@users.noreply.github.com> Date: Thu, 21 Nov 2024 22:00:18 -0500 Subject: [PATCH] feat: add Gemini text-embedding-004 model (#122) - Add Gemini text-embedding-004 as a new embedding model option - Add rate limit error handling for embedding API calls - Update model names to include provider prefix for clarity --- README.md | 1 + ..._vector_data_gemini_text_embedding_004.sql | 10 + drizzle/meta/0007_snapshot.json | 434 ++++++++++++++++++ drizzle/meta/_journal.json | 7 + src/constants.ts | 19 +- src/core/llm/exception.ts | 7 + src/core/rag/embedding.ts | 87 +++- src/core/rag/ragEngine.ts | 2 + src/database/migrations.json | 9 + src/database/modules/vector/VectorManager.ts | 12 +- src/database/schema.ts | 1 + src/types/embedding.ts | 1 + 12 files changed, 559 insertions(+), 31 deletions(-) create mode 100644 drizzle/0007_create_vector_data_gemini_text_embedding_004.sql create mode 100644 drizzle/meta/0007_snapshot.json diff --git a/README.md b/README.md index 587f21b..07eb4aa 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ Note: The Apply Edit feature is currently slower than desired. We are working on 5. Set up your API key in plugin settings - OpenAI : [ChatGPT API Keys](https://platform.openai.com/api-keys) - Anthropic : [Claude API Keys](https://console.anthropic.com/settings/keys) + - Gemini : [Gemini API Keys](https://aistudio.google.com/apikey) - Groq : [Groq API Keys](https://console.groq.com/keys) **📚 For detailed setup instructions and documentation, please visit our [Documentation](https://github.com/glowingjade/obsidian-smart-composer/wiki).** diff --git a/drizzle/0007_create_vector_data_gemini_text_embedding_004.sql b/drizzle/0007_create_vector_data_gemini_text_embedding_004.sql new file mode 100644 index 0000000..35a207a --- /dev/null +++ b/drizzle/0007_create_vector_data_gemini_text_embedding_004.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS "vector_data_gemini_text_embedding_004" ( + "id" serial PRIMARY KEY NOT NULL, + "path" text NOT NULL, + "mtime" bigint NOT NULL, + "content" text NOT NULL, + "embedding" vector(768), + "metadata" jsonb NOT NULL +); +--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "embeddingIndex_gemini_text_embedding_004" ON "vector_data_gemini_text_embedding_004" USING hnsw ("embedding" vector_cosine_ops); \ No newline at end of file diff --git a/drizzle/meta/0007_snapshot.json b/drizzle/meta/0007_snapshot.json new file mode 100644 index 0000000..fbb447a --- /dev/null +++ b/drizzle/meta/0007_snapshot.json @@ -0,0 +1,434 @@ +{ + "id": "ab9cffab-e98f-49a2-be13-6fb5f3eda0ac", + "prevId": "f33fa3e2-3170-4187-a15d-d01ed651fe8b", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.template": { + "name": "template", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "template_name_unique": { + "name": "template_name_unique", + "nullsNotDistinct": false, + "columns": ["name"] + } + }, + "checkConstraints": {} + }, + "public.vector_data_openai_text_embedding_3_small": { + "name": "vector_data_openai_text_embedding_3_small", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "path": { + "name": "path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mtime": { + "name": "mtime", + "type": "bigint", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "embedding": { + "name": "embedding", + "type": "vector(1536)", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "embeddingIndex_openai_text_embedding_3_small": { + "name": "embeddingIndex_openai_text_embedding_3_small", + "columns": [ + { + "expression": "embedding", + "isExpression": false, + "asc": true, + "nulls": "last", + "opclass": "vector_cosine_ops" + } + ], + "isUnique": false, + "concurrently": false, + "method": "hnsw", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "public.vector_data_openai_text_embedding_3_large": { + "name": "vector_data_openai_text_embedding_3_large", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "path": { + "name": "path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mtime": { + "name": "mtime", + "type": "bigint", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "embedding": { + "name": "embedding", + "type": "vector(3072)", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "public.vector_data_gemini_text_embedding_004": { + "name": "vector_data_gemini_text_embedding_004", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "path": { + "name": "path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mtime": { + "name": "mtime", + "type": "bigint", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "embedding": { + "name": "embedding", + "type": "vector(768)", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "embeddingIndex_gemini_text_embedding_004": { + "name": "embeddingIndex_gemini_text_embedding_004", + "columns": [ + { + "expression": "embedding", + "isExpression": false, + "asc": true, + "nulls": "last", + "opclass": "vector_cosine_ops" + } + ], + "isUnique": false, + "concurrently": false, + "method": "hnsw", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "public.vector_data_ollama_nomic_embed_text": { + "name": "vector_data_ollama_nomic_embed_text", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "path": { + "name": "path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mtime": { + "name": "mtime", + "type": "bigint", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "embedding": { + "name": "embedding", + "type": "vector(768)", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "embeddingIndex_ollama_nomic_embed_text": { + "name": "embeddingIndex_ollama_nomic_embed_text", + "columns": [ + { + "expression": "embedding", + "isExpression": false, + "asc": true, + "nulls": "last", + "opclass": "vector_cosine_ops" + } + ], + "isUnique": false, + "concurrently": false, + "method": "hnsw", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "public.vector_data_ollama_mxbai_embed_large": { + "name": "vector_data_ollama_mxbai_embed_large", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "path": { + "name": "path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mtime": { + "name": "mtime", + "type": "bigint", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "embedding": { + "name": "embedding", + "type": "vector(1024)", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "embeddingIndex_ollama_mxbai_embed_large": { + "name": "embeddingIndex_ollama_mxbai_embed_large", + "columns": [ + { + "expression": "embedding", + "isExpression": false, + "asc": true, + "nulls": "last", + "opclass": "vector_cosine_ops" + } + ], + "isUnique": false, + "concurrently": false, + "method": "hnsw", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "public.vector_data_ollama_bge_m3": { + "name": "vector_data_ollama_bge_m3", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "path": { + "name": "path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mtime": { + "name": "mtime", + "type": "bigint", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "embedding": { + "name": "embedding", + "type": "vector(1024)", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "embeddingIndex_ollama_bge_m3": { + "name": "embeddingIndex_ollama_bge_m3", + "columns": [ + { + "expression": "embedding", + "isExpression": false, + "asc": true, + "nulls": "last", + "opclass": "vector_cosine_ops" + } + ], + "isUnique": false, + "concurrently": false, + "method": "hnsw", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 66ed4bc..81d3aa6 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -50,6 +50,13 @@ "when": 1732064682864, "tag": "0006_rename_vector_tables", "breakpoints": true + }, + { + "idx": 7, + "version": "7", + "when": 1732189839252, + "tag": "0007_create_vector_data_gemini_text_embedding_004", + "breakpoints": true } ] } diff --git a/src/constants.ts b/src/constants.ts index 533e9be..7ab1496 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -132,7 +132,7 @@ export const APPLY_MODEL_OPTIONS: ModelOption[] = [ export const EMBEDDING_MODEL_OPTIONS: EmbeddingModelOption[] = [ { id: 'openai/text-embedding-3-small', - name: 'text-embedding-3-small (Recommended)', + name: 'openai/text-embedding-3-small (Recommended)', model: { provider: 'openai', model: 'text-embedding-3-small', @@ -141,7 +141,7 @@ export const EMBEDDING_MODEL_OPTIONS: EmbeddingModelOption[] = [ }, { id: 'openai/text-embedding-3-large', - name: 'text-embedding-3-large', + name: 'openai/text-embedding-3-large', model: { provider: 'openai', model: 'text-embedding-3-large', @@ -149,7 +149,16 @@ export const EMBEDDING_MODEL_OPTIONS: EmbeddingModelOption[] = [ dimension: 3072, }, { - name: 'nomic-embed-text (Ollama)', + id: 'gemini/text-embedding-004', + name: 'gemini/text-embedding-004', + model: { + provider: 'gemini', + model: 'text-embedding-004', + }, + dimension: 768, + }, + { + name: 'ollama/nomic-embed-text', id: 'ollama/nomic-embed-text', model: { provider: 'ollama', @@ -159,7 +168,7 @@ export const EMBEDDING_MODEL_OPTIONS: EmbeddingModelOption[] = [ dimension: 768, }, { - name: 'mxbai-embed-large (Ollama)', + name: 'ollama/mxbai-embed-large', id: 'ollama/mxbai-embed-large', model: { provider: 'ollama', @@ -169,7 +178,7 @@ export const EMBEDDING_MODEL_OPTIONS: EmbeddingModelOption[] = [ dimension: 1024, }, { - name: 'bge-m3 (Ollama)', + name: 'ollama/bge-m3', id: 'ollama/bge-m3', model: { provider: 'ollama', diff --git a/src/core/llm/exception.ts b/src/core/llm/exception.ts index 6ea8fd4..4224009 100644 --- a/src/core/llm/exception.ts +++ b/src/core/llm/exception.ts @@ -25,3 +25,10 @@ export class LLMModelNotSetException extends Error { this.name = 'LLMModelNotSetException' } } + +export class LLMRateLimitExceededException extends Error { + constructor(message: string) { + super(message) + this.name = 'LLMRateLimitExceededException' + } +} diff --git a/src/core/rag/embedding.ts b/src/core/rag/embedding.ts index a5c4cb4..8e670a7 100644 --- a/src/core/rag/embedding.ts +++ b/src/core/rag/embedding.ts @@ -1,9 +1,11 @@ +import { GoogleGenerativeAI } from '@google/generative-ai' import { OpenAI } from 'openai' import { EmbeddingModel } from '../../types/embedding' import { LLMAPIKeyNotSetException, LLMBaseUrlNotSetException, + LLMRateLimitExceededException, } from '../llm/exception' import { NoStainlessOpenAI } from '../llm/ollama' @@ -11,6 +13,7 @@ export const getEmbeddingModel = ( embeddingModelId: string, apiKeys: { openAIApiKey: string + geminiApiKey: string }, ollamaBaseUrl: string, ): EmbeddingModel => { @@ -24,16 +27,28 @@ export const getEmbeddingModel = ( id: 'openai/text-embedding-3-small', dimension: 1536, getEmbedding: async (text: string) => { - if (!openai.apiKey) { - throw new LLMAPIKeyNotSetException( - 'OpenAI API key is missing. Please set it in settings menu.', - ) + try { + if (!openai.apiKey) { + throw new LLMAPIKeyNotSetException( + 'OpenAI API key is missing. Please set it in settings menu.', + ) + } + const embedding = await openai.embeddings.create({ + model: 'text-embedding-3-small', + input: text, + }) + return embedding.data[0].embedding + } catch (error) { + if ( + error.status === 429 && + error.message.toLowerCase().includes('rate limit') + ) { + throw new LLMRateLimitExceededException( + 'OpenAI API rate limit exceeded. Please try again later.', + ) + } + throw error } - const embedding = await openai.embeddings.create({ - model: 'text-embedding-3-small', - input: text, - }) - return embedding.data[0].embedding }, } } @@ -46,16 +61,52 @@ export const getEmbeddingModel = ( id: 'openai/text-embedding-3-large', dimension: 3072, getEmbedding: async (text: string) => { - if (!openai.apiKey) { - throw new LLMAPIKeyNotSetException( - 'OpenAI API key is missing. Please set it in settings menu.', - ) + try { + if (!openai.apiKey) { + throw new LLMAPIKeyNotSetException( + 'OpenAI API key is missing. Please set it in settings menu.', + ) + } + const embedding = await openai.embeddings.create({ + model: 'text-embedding-3-large', + input: text, + }) + return embedding.data[0].embedding + } catch (error) { + if ( + error.status === 429 && + error.message.toLowerCase().includes('rate limit') + ) { + throw new LLMRateLimitExceededException( + 'OpenAI API rate limit exceeded. Please try again later.', + ) + } + throw error + } + }, + } + } + case 'gemini/text-embedding-004': { + const client = new GoogleGenerativeAI(apiKeys.geminiApiKey) + const model = client.getGenerativeModel({ model: 'text-embedding-004' }) + return { + id: 'gemini/text-embedding-004', + dimension: 768, + getEmbedding: async (text: string) => { + try { + const response = await model.embedContent(text) + return response.embedding.values + } catch (error) { + if ( + error.status === 429 && + error.message.includes('RATE_LIMIT_EXCEEDED') + ) { + throw new LLMRateLimitExceededException( + 'Gemini API rate limit exceeded. Please try again later.', + ) + } + throw error } - const embedding = await openai.embeddings.create({ - model: 'text-embedding-3-large', - input: text, - }) - return embedding.data[0].embedding }, } } diff --git a/src/core/rag/ragEngine.ts b/src/core/rag/ragEngine.ts index b439d99..572cab8 100644 --- a/src/core/rag/ragEngine.ts +++ b/src/core/rag/ragEngine.ts @@ -27,6 +27,7 @@ export class RAGEngine { settings.embeddingModelId, { openAIApiKey: settings.openAIApiKey, + geminiApiKey: settings.geminiApiKey, }, settings.ollamaEmbeddingModel.baseUrl, ) @@ -38,6 +39,7 @@ export class RAGEngine { settings.embeddingModelId, { openAIApiKey: settings.openAIApiKey, + geminiApiKey: settings.geminiApiKey, }, settings.ollamaEmbeddingModel.baseUrl, ) diff --git a/src/database/migrations.json b/src/database/migrations.json index 27ccbe0..5292939 100644 --- a/src/database/migrations.json +++ b/src/database/migrations.json @@ -68,5 +68,14 @@ "bps": true, "folderMillis": 1732064682864, "hash": "3dad96e7f1c939c5da3cae4792782e7a35488d39da418e6a9e6f60eafd637f55" + }, + { + "sql": [ + "CREATE TABLE IF NOT EXISTS \"vector_data_gemini_text_embedding_004\" (\n\t\"id\" serial PRIMARY KEY NOT NULL,\n\t\"path\" text NOT NULL,\n\t\"mtime\" bigint NOT NULL,\n\t\"content\" text NOT NULL,\n\t\"embedding\" vector(768),\n\t\"metadata\" jsonb NOT NULL\n);\n", + "\nCREATE INDEX IF NOT EXISTS \"embeddingIndex_gemini_text_embedding_004\" ON \"vector_data_gemini_text_embedding_004\" USING hnsw (\"embedding\" vector_cosine_ops);" + ], + "bps": true, + "folderMillis": 1732189839252, + "hash": "9eef3295787aab3582954d7b8fbdeda90de062f43e09a0a53d839c601c00fe0f" } ] diff --git a/src/database/modules/vector/VectorManager.ts b/src/database/modules/vector/VectorManager.ts index c79aba9..fa5cce5 100644 --- a/src/database/modules/vector/VectorManager.ts +++ b/src/database/modules/vector/VectorManager.ts @@ -1,7 +1,7 @@ import { backOff } from 'exponential-backoff' import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter' import { minimatch } from 'minimatch' -import { App, TFile } from 'obsidian' +import { App, Notice, TFile } from 'obsidian' import pLimit from 'p-limit' import { IndexProgress } from '../../../components/chat-view/QueryProgress' @@ -9,6 +9,7 @@ import { LLMAPIKeyInvalidException, LLMAPIKeyNotSetException, LLMBaseUrlNotSetException, + LLMRateLimitExceededException, } from '../../../core/llm/exception' import { InsertVector, SelectVector } from '../../../database/schema' import { EmbeddingModel } from '../../../types/embedding' @@ -174,13 +175,6 @@ export class VectorManager { startingDelay: 1000, timeMultiple: 1.5, jitter: 'full', - retry: (error) => { - console.error(error) - const isRateLimitError = - error.status === 429 && - error.message.toLowerCase().includes('rate limit') - return !!isRateLimitError // retry only for rate limit errors - }, }, ) } catch (error) { @@ -199,6 +193,8 @@ export class VectorManager { error instanceof LLMBaseUrlNotSetException ) { openSettingsModalWithError(this.app, (error as Error).message) + } else if (error instanceof LLMRateLimitExceededException) { + new Notice(error.message) } else { console.error('Error embedding chunks:', error) throw error diff --git a/src/database/schema.ts b/src/database/schema.ts index daca167..f8184ec 100644 --- a/src/database/schema.ts +++ b/src/database/schema.ts @@ -59,6 +59,7 @@ export const vectorTable1 = vectorTables[EMBEDDING_MODEL_OPTIONS[1].id] export const vectorTable2 = vectorTables[EMBEDDING_MODEL_OPTIONS[2].id] export const vectorTable3 = vectorTables[EMBEDDING_MODEL_OPTIONS[3].id] export const vectorTable4 = vectorTables[EMBEDDING_MODEL_OPTIONS[4].id] +export const vectorTable5 = vectorTables[EMBEDDING_MODEL_OPTIONS[5].id] /* Template Table */ export type TemplateContent = { diff --git a/src/types/embedding.ts b/src/types/embedding.ts index 045329e..1100117 100644 --- a/src/types/embedding.ts +++ b/src/types/embedding.ts @@ -3,6 +3,7 @@ import { LLMModel } from './llm/model' export type EmbeddingModelId = | 'openai/text-embedding-3-small' | 'openai/text-embedding-3-large' + | 'gemini/text-embedding-004' | 'ollama/nomic-embed-text' | 'ollama/mxbai-embed-large' | 'ollama/bge-m3'