From 79d610c3f24d807bcf37d134ae457daf4e8665f8 Mon Sep 17 00:00:00 2001 From: Marko Manninen Date: Tue, 7 May 2024 14:12:05 +0300 Subject: [PATCH 1/2] Added LocalEmbeddings, Ollama model and related examples with README updates. --- README.md | 133 ++++++++++++-------- examples/ollama/README.md | 92 ++++++++++++++ examples/ollama/package-lock.json | 195 +++++++++++++++++++++++++++++ examples/ollama/package.json | 18 +++ examples/ollama/requirements.txt | 5 + examples/ollama/server.py | 25 ++++ examples/ollama/src/index.ts | 31 +++++ examples/ollama/tsconfig.json | 104 +++++++++++++++ src/embeddings/local-embeddings.ts | 31 +++++ src/index.ts | 4 + src/models/ollama-model.ts | 60 +++++++++ 11 files changed, 648 insertions(+), 50 deletions(-) create mode 100644 examples/ollama/README.md create mode 100644 examples/ollama/package-lock.json create mode 100644 examples/ollama/package.json create mode 100644 examples/ollama/requirements.txt create mode 100644 examples/ollama/server.py create mode 100644 examples/ollama/src/index.ts create mode 100644 examples/ollama/tsconfig.json create mode 100644 src/embeddings/local-embeddings.ts create mode 100644 src/models/ollama-model.ts diff --git a/README.md b/README.md index 063eb255..09f7c4c1 100644 --- a/README.md +++ b/README.md @@ -47,56 +47,64 @@ The author(s) are looking to add core maintainers for this opensource project. R # Contents -- [Getting started](#getting-started) - - [Installation](#installation) - - [Usage](#usage) - - [Temperature](#temperature) - - [Search results count](#search-results-count) - - [Customize the prompt](#customize-the-prompt) - - [Dry run](#get-context) -- [Loaders supported](#loaders-supported) - - [PDF](#pdf-file) - - [Youtube](#youtube-video) - - [Youtube channels](#youtube-channel) - - [Youtube search](#youtube-search) - - [Web page](#web-page) - - [Confluence](#confluence) - - [Sitemap](#sitemap) - - [Text](#text) - - [Custom loader](#add-a-custom-loader) - - [How to request more loaders](#more-loaders-coming-soon) -- [LLMs](#llms) - - [OpenAI](#openai) - - [Azure OpenAI](#azure-openai) - - [Mistral](#mistral) - - [Hugging Face](#hugging-face) - - [Anthropic](#anthropic) - - [Bring your own LLMs](#use-custom-llm-model) - - [Request support for new LLMs](#more-llms-coming-soon) -- [Embedding Models](#embedding-models) - - [OpenAI v3 Small](#openai-v3-small) - - [OpenAI v3 Large](#openai-v3-large) - - [ADA](#ada) - - [Cohere](#cohere) - - [Custom embedding models](#use-custom-embedding-model) - - [Request support for embedding models](#more-embedding-models-coming-soon) -- [Vector databases supported](#vector-databases-supported) - - [Pinecone](#pinecone) - - [LanceDB](#lancedb) - - [Chroma](#chroma) - - [HNSWLib](#hnswlib) - - [Weaviate](#weaviate) - - [Qdrant](#qdrant) - - [Own Database](#bring-your-own-database) - - [How to request new vector databases](#more-databases-coming-soon) -- [Caches](#caches) - - [Redis](#redis) - - [LMDB File](#lmdb) - - [In memory cache](#inmemory) - - [Custom cache implementation](#bring-your-own-cache) - - [How to request new cache providers](#more-caches-coming-soon) -- [Sample projects](#sample-projects) -- [Contributors](#contributors) +- [EmbedJs](#embedjs) + - [Features](#features) + - [Quick note](#quick-note) +- [Contents](#contents) +- [Getting started](#getting-started) + - [Installation](#installation) + - [Usage](#usage) + - [Temperature](#temperature) + - [Search results count](#search-results-count) + - [Customize the prompt](#customize-the-prompt) + - [Get context (dry run)](#get-context-dry-run) + - [Get count of embedded chunks](#get-count-of-embedded-chunks) +- [Loaders supported](#loaders-supported) + - [Youtube video](#youtube-video) + - [Youtube channel](#youtube-channel) + - [Youtube search](#youtube-search) + - [PDF file](#pdf-file) + - [Web page](#web-page) + - [Confluence](#confluence) + - [Sitemap](#sitemap) + - [Text](#text) + - [Add a custom loader](#add-a-custom-loader) + - [More loaders coming soon](#more-loaders-coming-soon) +- [LLMs](#llms) + - [OpenAI](#openai) + - [Azure OpenAI](#azure-openai) + - [Mistral](#mistral) + - [Hugging Face](#hugging-face) + - [Anthropic](#anthropic) + - [Ollama](#ollama) + - [Use custom LLM model](#use-custom-llm-model) + - [More LLMs coming soon](#more-llms-coming-soon) +- [Embedding models](#embedding-models) + - [OpenAI v3 Small](#openai-v3-small) + - [OpenAI v3 Large](#openai-v3-large) + - [Ada](#ada) + - [Cohere](#cohere) + - [Local embeddings](#local-embeddings) + - [Use custom embedding model](#use-custom-embedding-model) + - [More embedding models coming soon](#more-embedding-models-coming-soon) +- [Vector databases supported](#vector-databases-supported) + - [Pinecone](#pinecone) + - [LanceDB](#lancedb) + - [Chroma](#chroma) + - [HNSWLib](#hnswlib) + - [Weaviate](#weaviate) + - [Qdrant](#qdrant) + - [Bring your own database](#bring-your-own-database) + - [More databases coming soon](#more-databases-coming-soon) +- [Caches](#caches) + - [LMDB](#lmdb) + - [InMemory](#inmemory) + - [Redis](#redis) + - [Bring your own cache](#bring-your-own-cache) + - [More caches coming soon](#more-caches-coming-soon) +- [Langsmith Integration](#langsmith-integration) +- [Sample projects](#sample-projects) +- [Contributors](#contributors) # Getting started @@ -452,6 +460,18 @@ const ragApplication = await new RAGApplicationBuilder() You can read more about the various models provided by Anthropic [here](https://docs.anthropic.com/claude/docs/models-overview). +## Ollama + +Locally running Ollama models are supported now. Installation instructions can be found from: [https://ollama.com/](https://ollama.com/). For the first time, execute `ollama run ` and use that model in the `Ollama` constructor as a `modelName`. Default port in which Ollama runs, is '11434', but if for some reason you use somthing else, you can pass `baseUrl` as the second argument: + +```TS +const ragApplication = await new RAGApplicationBuilder() +.setModel(new Ollama({ + modelName: "llama3", + baseUrl: 'http://localhost:11434' +})) +``` + ## Use custom LLM model You can use a custom LLM model by implementing the `BaseModel` interface. Here's how that would look like - @@ -546,6 +566,19 @@ await new RAGApplicationBuilder() .setEmbeddingModel(new CohereEmbeddings()) ``` +## Local embeddings + +Run a local server with embed API endpoint that takes 'texts' as a POST action argument, transforms value to a vector representation, and returns a JSON list. Server may utilize Sentence Transformers 'all-MiniLM-L6-v2' model, for instance. The server address with a port and a model parameter count ('384' for 'all-MiniLM-L6-v2') must be provided in the `LocalEmbeddings` constructor. + +```TS +import { LocalEmbeddings } from '@llm-tools/embedjs'; + +await new RAGApplicationBuilder() +.setEmbeddingModel(new LocalEmbeddings("http://localhost:5000/embed", 384)) +``` + +See `examples/ollama` for a complete example. + ## Use custom embedding model You can use your own custom embedding model by implementing the `BaseEmbeddings` interface. Here's how that would look like - diff --git a/examples/ollama/README.md b/examples/ollama/README.md new file mode 100644 index 00000000..451d916e --- /dev/null +++ b/examples/ollama/README.md @@ -0,0 +1,92 @@ +## Requirements + +This example consists of a Python Flask application that handles text embeddings and a Node.js application that uses these embeddings with `embedJs` RAG library. + +Main emphasis is on open-source and local running of the RAG application. + + +### Install NodeJS dependencies + +```bash +npm install +``` + +**WSL note** + +After reinstalling the dependencies, force a rebuild of all native modules to be sure they're compatible with your Linux environment under WSL: + +```bash +npm rebuild --update-binary +``` + +### Install Python dependencies + +To run verctor embedding server with models supported by `SentenceTransformer`: + +```bash +pip install -r requirements.txt +``` + +Be prepared to upgrade some libraries, like huggingface_hub: + +```bash +pip3 install sentence_transformers --upgrade +``` + +### Usage + +To run the full application (both Flask and Node.js apps), execute the following commands. + +Simple start up script run with the default parameters: + +```bash +python server.py +``` + +#### Configurations + +Windows: + +```bash +$env:FLASK_RUN_PORT="5000"; python server.py --model "all-MiniLM-L6-v2" --port 5000 +``` + +Linux/Mac: + +```bash +FLASK_RUN_PORT=5000 python server.py --model "all-MiniLM-L6-v2" --port 5000 & +``` + +Above line starts embedding server as a background service and needs to be killed manually after running the example. + +```bash +$ sudo lsof -i :5000 +``` + +-> + +```bash +$ sudo kill portNumber +``` + +### Tesla example + +You have to had installed ollama ([https://ollama.com/](https://ollama.com/)) and run at least once: + +```bash +ollama run llama3 +``` + +Run the "Tesla text" retrieval simple example with default parameters: + +```bash +npm start +``` + +#### Configurations + +```bash +npm start -- "llama3" "http://localhost:5000/embed" 384 +``` + +That will output similarity search results interpereted by local Ollama llama3 LLM after the content has been first retrieved from internet and indexed to the in-memory vector database. diff --git a/examples/ollama/package-lock.json b/examples/ollama/package-lock.json new file mode 100644 index 00000000..aeaf197e --- /dev/null +++ b/examples/ollama/package-lock.json @@ -0,0 +1,195 @@ +{ + "name": "ollama", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "ollama", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "ts-node": "^10.9.2" + }, + "devDependencies": { + "@types/node": "^20.11.24" + } + }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.4.15", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", + "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, + "node_modules/@tsconfig/node10": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz", + "integrity": "sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==" + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==" + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==" + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", + "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==" + }, + "node_modules/@types/node": { + "version": "20.12.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.10.tgz", + "integrity": "sha512-Eem5pH9pmWBHoGAT8Dr5fdc5rYA+4NAovdM4EktRPVAAiJhmWWfQrA0cFhAbOsQdSfIHjAud6YdkbL69+zSKjw==", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/acorn": { + "version": "8.11.3", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz", + "integrity": "sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg==", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.2.tgz", + "integrity": "sha512-cjkyv4OtNCIeqhHrfS81QWXoCBPExR/J62oyEqepVw8WaQeSqpW2uhuLPh1m9eWhDuOo/jUXVTlifvesOWp/4A==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/arg": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", + "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==" + }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==" + }, + "node_modules/diff": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", + "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==" + }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, + "node_modules/typescript": { + "version": "5.4.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz", + "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", + "peer": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" + }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==" + }, + "node_modules/yn": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", + "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", + "engines": { + "node": ">=6" + } + } + } +} diff --git a/examples/ollama/package.json b/examples/ollama/package.json new file mode 100644 index 00000000..6b8b4077 --- /dev/null +++ b/examples/ollama/package.json @@ -0,0 +1,18 @@ +{ + "name": "ollama", + "version": "1.0.0", + "type": "module", + "private": true, + "scripts": { + "start": "tsc && node dist/examples/ollama/src/index.js", + "start-all": "tsc && ./start-all.sh" + }, + "author": "", + "license": "ISC", + "dependencies": { + "ts-node": "^10.9.2" + }, + "devDependencies": { + "@types/node": "^20.11.24" + } +} diff --git a/examples/ollama/requirements.txt b/examples/ollama/requirements.txt new file mode 100644 index 00000000..1fd16b66 --- /dev/null +++ b/examples/ollama/requirements.txt @@ -0,0 +1,5 @@ +Flask==2.0.1 +Werkzeug==2.0.1 +huggingface_hub>=0.8.0 +sentence_transformers>=2.2.0 +Jinja2>=3.0 \ No newline at end of file diff --git a/examples/ollama/server.py b/examples/ollama/server.py new file mode 100644 index 00000000..71e3208a --- /dev/null +++ b/examples/ollama/server.py @@ -0,0 +1,25 @@ +from flask import Flask, request, jsonify +from sentence_transformers import SentenceTransformer +import argparse + +app = Flask(__name__) + +# Set up command-line argument parsing +parser = argparse.ArgumentParser(description='Run a Flask server for embedding texts with Sentence Transformers.') +parser.add_argument('--model', type=str, default='all-MiniLM-L6-v2', + help='Model name for Sentence Transformers (default: all-MiniLM-L6-v2)') +parser.add_argument('--port', type=int, default=5000, + help='Port number for the Flask server (default: 5000)') +args = parser.parse_args() + +# Initialize the model based on the command-line argument +model = SentenceTransformer(args.model) + +@app.route('/embed', methods=['POST']) +def embed(): + texts = request.json['texts'] + embeddings = model.encode(texts, convert_to_tensor=False, convert_to_numpy=True) + return jsonify(embeddings.tolist()) + +if __name__ == '__main__': + app.run(port=args.port) diff --git a/examples/ollama/src/index.ts b/examples/ollama/src/index.ts new file mode 100644 index 00000000..d2e01780 --- /dev/null +++ b/examples/ollama/src/index.ts @@ -0,0 +1,31 @@ +import { RAGApplicationBuilder, WebLoader, YoutubeLoader, SitemapLoader, Ollama, LocalEmbeddings } from '../../../src/index.js'; +import { HNSWDb } from '../../../src/vectorDb/hnswlib-db.js'; + +const modelName = process.argv[2] || 'llama3'; +const serverUrl = process.argv[3] || 'http://localhost:5000/embed'; +const dimensions = parseInt(process.argv[4], 10) || 384; + +const llmApplication = await new RAGApplicationBuilder() + .setEmbeddingModel(new LocalEmbeddings( serverUrl, dimensions )) + .setModel(new Ollama({ + modelName: modelName, + baseUrl: 'http://localhost:11434' + })) + .setSearchResultCount(30) + .setVectorDb(new HNSWDb()) + .build(); + +llmApplication.addLoader(new YoutubeLoader({ videoIdOrUrl: 'pQiT2U5E9tI' })); +llmApplication.addLoader(new SitemapLoader({ url: 'https://tesla-info.com/sitemap.xml' })); +llmApplication.addLoader(new WebLoader({ url: 'https://en.wikipedia.org/wiki/Tesla,_Inc.' })); + +console.log((await llmApplication.query('Who founded Tesla?')).result); +// The founder of Tesla is Elon Musk. He co-founded the company with JB Straubel, Martin Eberhard, Marc Tarpenning, and Ian Wright in 2003. Elon Musk is also the CEO of SpaceX and Neuralink. + +console.log((await llmApplication.query('Tell me about the history of Tesla?')).result); +// Tesla, Inc. was founded in 2003 by Martin Eberhard and Marc Tarpenning with the goal of creating electric vehicles that could compete with traditional gasoline-powered cars. Elon Musk led the company's Series A financing round in February 2004, and has since played a significant role in the company's development. +// The company's first vehicle, the Tesla Roadster, was released in 2008. It was the first highway-legal all-electric vehicle to use lithium-ion battery cells, and could travel 245 miles (394 km) on a single charge. The Roadster was followed by the Model S, a full-sized luxury sedan, in 2012. The Model S was the world's best-selling plug-in electric vehicle in 2015 and 2016. +// In 2015, Tesla released the Model X, a mid-size luxury SUV, and in 2017, it began production of the Model 3, a four-door sedan aimed at the mass market. The Model 3 became the world's best-selling electric vehicle in 2018. Tesla also produces the Tesla Semi, a Class 8 semi-truck, and the Tesla Cybertruck, a full + +console.log((await llmApplication.query('What cars does Tesla have')).result); +// Tesla currently offers six vehicle models: Model S, Model X, Model 3, Model Y, Tesla Semi, and Cybertruck. The first-generation Tesla Roadster is no longer sold, but Tesla has plans for a second-generation Roadster. Tesla also has joint projects with Mercedes, Toyota, and Smart. diff --git a/examples/ollama/tsconfig.json b/examples/ollama/tsconfig.json new file mode 100644 index 00000000..b8c66cc0 --- /dev/null +++ b/examples/ollama/tsconfig.json @@ -0,0 +1,104 @@ +{ + "compilerOptions": { + /* Projects */ + // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */ + // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ + // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */ + // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */ + // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ + // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ + + /* Language and Environment */ + "target": "ES2020" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, + // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ + // "jsx": "preserve", /* Specify what JSX code is generated. */ + "experimentalDecorators": true /* Enable experimental support for TC39 stage 2 draft decorators. */, + "emitDecoratorMetadata": true /* Emit design-type metadata for decorated declarations in source files. */, + // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ + // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ + // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ + // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ + // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ + // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ + // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ + + /* Modules */ + "module": "nodenext" /* Specify what module code is generated. */, + // "rootDir": "./", /* Specify the root folder within your source files. */ + // "moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */ + // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ + // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ + // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ + // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */ + // "types": [], /* Specify type package names to be included without being referenced in a source file. */ + // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ + // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */ + // "resolveJsonModule": true, /* Enable importing .json files. */ + // "noResolve": true, /* Disallow 'import's, 'require's or ''s from expanding the number of files TypeScript should add to a project. */ + + /* JavaScript Support */ + // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */ + // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ + // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */ + + /* Emit */ + "declaration": true /* Generate .d.ts files from TypeScript and JavaScript files in your project. */, + // "declarationMap": true, /* Create sourcemaps for d.ts files. */ + // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ + // "sourceMap": true, /* Create source map files for emitted JavaScript files. */ + // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */ + "outDir": "./dist" /* Specify an output folder for all emitted files. */, + "removeComments": true /* Disable emitting comments. */, + // "noEmit": true, /* Disable emitting files from a compilation. */ + // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ + // "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */ + // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ + // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ + // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ + // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ + // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ + // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ + // "newLine": "crlf", /* Set the newline character for emitting files. */ + // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */ + // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */ + // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ + // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */ + // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ + // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */ + + /* Interop Constraints */ + // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ + // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ + "esModuleInterop": true /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */, + // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ + "forceConsistentCasingInFileNames": true /* Ensure that casing is correct in imports. */, + + /* Type Checking */ + "strict": false /* Enable all strict type-checking options. */, + // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */ + // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */ + // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ + // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */ + // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ + // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */ + // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */ + // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ + // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */ + // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */ + // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ + // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ + // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ + // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */ + // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ + // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */ + // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ + // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ + + /* Completeness */ + // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ + "skipLibCheck": true /* Skip type checking all .d.ts files. */, + "types": ["node"] + }, + "include": ["src/**/*", "../../src/**/*"] + } + \ No newline at end of file diff --git a/src/embeddings/local-embeddings.ts b/src/embeddings/local-embeddings.ts new file mode 100644 index 00000000..ecadf656 --- /dev/null +++ b/src/embeddings/local-embeddings.ts @@ -0,0 +1,31 @@ +import { BaseEmbeddings } from '../interfaces/base-embeddings.js'; +import axios from 'axios'; + +export class LocalEmbeddings implements BaseEmbeddings { + private serverUrl: string; + private dimensions: number; + + // Dimensions for 'all-MiniLM-L6-v2' are 384 + constructor(serverUrl: string = 'http://localhost:5000/embed', dimensions: number = 384) { + this.serverUrl = serverUrl; + this.dimensions = dimensions; + } + + getDimensions(): number { + return this.dimensions; + } + + async embedDocuments(texts: string[]): Promise { + try { + const response = await axios.post(this.serverUrl, { texts }); + return response.data; + } catch (error) { + console.error('Error embedding documents:', error); + throw error; + } + } + + async embedQuery(text: string): Promise { + return this.embedDocuments([text]).then(res => res[0]); + } +} \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index dfa92643..bb9ba6ce 100644 --- a/src/index.ts +++ b/src/index.ts @@ -20,9 +20,11 @@ import { AdaEmbeddings } from './embeddings/ada-embeddings.js'; import { CohereEmbeddings } from './embeddings/cohere-embeddings.js'; import { OpenAi3LargeEmbeddings } from './embeddings/openai-3large-embeddings.js'; import { OpenAi3SmallEmbeddings } from './embeddings/openai-3small-embeddings.js'; +import { LocalEmbeddings } from './embeddings/local-embeddings.js'; import { Mistral } from './models/mistral-model.js'; import { HuggingFace } from './models/huggingface-model.js'; import { Anthropic } from './models/anthropic-model.js'; +import { Ollama } from './models/ollama-model.js'; export { RAGApplication, @@ -47,7 +49,9 @@ export { CohereEmbeddings, OpenAi3LargeEmbeddings, OpenAi3SmallEmbeddings, + LocalEmbeddings, Mistral, HuggingFace, Anthropic, + Ollama, }; diff --git a/src/models/ollama-model.ts b/src/models/ollama-model.ts new file mode 100644 index 00000000..b1a41b71 --- /dev/null +++ b/src/models/ollama-model.ts @@ -0,0 +1,60 @@ +import createDebugMessages from 'debug'; +import { Ollama as ChatOllamaAI } from '@langchain/community/llms/ollama'; +import { AIMessage, HumanMessage, SystemMessage } from '@langchain/core/messages'; + +import { Chunk, ConversationHistory } from '../global/types.js'; +import { BaseModel } from '../interfaces/base-model.js'; + +export class Ollama extends BaseModel { + private readonly debug = createDebugMessages('embedjs:model:Ollama'); + private model: ChatOllamaAI; + + constructor({ + baseUrl, + temperature, + modelName + }: { + baseUrl?: string; + temperature?: number; + modelName?: string; + }) { + super(temperature); + this.model = new ChatOllamaAI( + { + model: modelName ?? 'llama2', + baseUrl: baseUrl ?? "http://localhost:11434" + } + ); + } + + override async runQuery( + system: string, + userQuery: string, + supportingContext: Chunk[], + pastConversations: ConversationHistory[], + ): Promise { + const pastMessages: (AIMessage | SystemMessage | HumanMessage)[] = [new SystemMessage(system)]; + pastMessages.push( + new SystemMessage(`Supporting context: ${supportingContext.map((s) => s.pageContent).join('; ')}`), + ); + + pastMessages.push.apply( + pastConversations.map((c) => { + if (c.sender === 'AI') + return new AIMessage({ + content: c.message, + }); + + return new HumanMessage({ + content: c.message, + }); + }), + ); + pastMessages.push(new HumanMessage(`${userQuery}?`)); + + this.debug(`Executing ollama model ${this.model} with prompt -`, userQuery); + const result = await this.model.invoke(pastMessages); + this.debug('Ollama response -', result); + return result.toString(); + } +} From 5e5e1a22550d96a43c4639126dc76e8785096d89 Mon Sep 17 00:00:00 2001 From: Marko Manninen Date: Fri, 10 May 2024 01:38:01 +0300 Subject: [PATCH 2/2] Removed local embeddings and left Ollama model for new PR. --- README.md | 14 ------ examples/ollama/README.md | 73 ++---------------------------- examples/ollama/package.json | 3 +- examples/ollama/requirements.txt | 5 -- examples/ollama/server.py | 25 ---------- examples/ollama/src/index.ts | 6 +-- src/embeddings/local-embeddings.ts | 31 ------------- src/index.ts | 2 - 8 files changed, 7 insertions(+), 152 deletions(-) delete mode 100644 examples/ollama/requirements.txt delete mode 100644 examples/ollama/server.py delete mode 100644 src/embeddings/local-embeddings.ts diff --git a/README.md b/README.md index c1e538f0..38ede9ed 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,6 @@ The author(s) are looking to add core maintainers for this opensource project. R - [OpenAI v3 Large](#openai-v3-large) - [Ada](#ada) - [Cohere](#cohere) - - [Local embeddings](#local-embeddings) - [Use custom embedding model](#use-custom-embedding-model) - [More embedding models coming soon](#more-embedding-models-coming-soon) - [Vector databases supported](#vector-databases-supported) @@ -566,19 +565,6 @@ await new RAGApplicationBuilder() .setEmbeddingModel(new CohereEmbeddings()) ``` -## Local embeddings - -Run a local server with embed API endpoint that takes 'texts' as a POST action argument, transforms value to a vector representation, and returns a JSON list. Server may utilize Sentence Transformers 'all-MiniLM-L6-v2' model, for instance. The server address with a port and a model parameter count ('384' for 'all-MiniLM-L6-v2') must be provided in the `LocalEmbeddings` constructor. - -```TS -import { LocalEmbeddings } from '@llm-tools/embedjs'; - -await new RAGApplicationBuilder() -.setEmbeddingModel(new LocalEmbeddings("http://localhost:5000/embed", 384)) -``` - -See `examples/ollama` for a complete example. - ## Use custom embedding model You can use your own custom embedding model by implementing the `BaseEmbeddings` interface. Here's how that would look like - diff --git a/examples/ollama/README.md b/examples/ollama/README.md index 451d916e..e76c1332 100644 --- a/examples/ollama/README.md +++ b/examples/ollama/README.md @@ -1,9 +1,8 @@ ## Requirements -This example consists of a Python Flask application that handles text embeddings and a Node.js application that uses these embeddings with `embedJs` RAG library. - -Main emphasis is on open-source and local running of the RAG application. +This example consists of a Node.js application that uses vector embeddings with `embedJs` RAG library to store text from various sources to database, retrieve them with similarity search and interpret with Ollama LLM. +Main motivation is on the open-source and local running of the RAG application. ### Install NodeJS dependencies @@ -11,64 +10,6 @@ Main emphasis is on open-source and local running of the RAG application. npm install ``` -**WSL note** - -After reinstalling the dependencies, force a rebuild of all native modules to be sure they're compatible with your Linux environment under WSL: - -```bash -npm rebuild --update-binary -``` - -### Install Python dependencies - -To run verctor embedding server with models supported by `SentenceTransformer`: - -```bash -pip install -r requirements.txt -``` - -Be prepared to upgrade some libraries, like huggingface_hub: - -```bash -pip3 install sentence_transformers --upgrade -``` - -### Usage - -To run the full application (both Flask and Node.js apps), execute the following commands. - -Simple start up script run with the default parameters: - -```bash -python server.py -``` - -#### Configurations - -Windows: - -```bash -$env:FLASK_RUN_PORT="5000"; python server.py --model "all-MiniLM-L6-v2" --port 5000 -``` - -Linux/Mac: - -```bash -FLASK_RUN_PORT=5000 python server.py --model "all-MiniLM-L6-v2" --port 5000 & -``` - -Above line starts embedding server as a background service and needs to be killed manually after running the example. - -```bash -$ sudo lsof -i :5000 -``` - --> - -```bash -$ sudo kill portNumber -``` - ### Tesla example You have to had installed ollama ([https://ollama.com/](https://ollama.com/)) and run at least once: @@ -80,13 +21,7 @@ ollama run llama3 Run the "Tesla text" retrieval simple example with default parameters: ```bash -npm start -``` - -#### Configurations - -```bash -npm start -- "llama3" "http://localhost:5000/embed" 384 +npm start -- llama3 ``` -That will output similarity search results interpereted by local Ollama llama3 LLM after the content has been first retrieved from internet and indexed to the in-memory vector database. +That will output similarity search results interpreted by local Ollama llama3 LLM after the content has been first retrieved from internet and indexed to the in-memory vector database. diff --git a/examples/ollama/package.json b/examples/ollama/package.json index 6b8b4077..f8e0d387 100644 --- a/examples/ollama/package.json +++ b/examples/ollama/package.json @@ -4,8 +4,7 @@ "type": "module", "private": true, "scripts": { - "start": "tsc && node dist/examples/ollama/src/index.js", - "start-all": "tsc && ./start-all.sh" + "start": "tsc && node dist/examples/ollama/src/index.js" }, "author": "", "license": "ISC", diff --git a/examples/ollama/requirements.txt b/examples/ollama/requirements.txt deleted file mode 100644 index 1fd16b66..00000000 --- a/examples/ollama/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -Flask==2.0.1 -Werkzeug==2.0.1 -huggingface_hub>=0.8.0 -sentence_transformers>=2.2.0 -Jinja2>=3.0 \ No newline at end of file diff --git a/examples/ollama/server.py b/examples/ollama/server.py deleted file mode 100644 index 71e3208a..00000000 --- a/examples/ollama/server.py +++ /dev/null @@ -1,25 +0,0 @@ -from flask import Flask, request, jsonify -from sentence_transformers import SentenceTransformer -import argparse - -app = Flask(__name__) - -# Set up command-line argument parsing -parser = argparse.ArgumentParser(description='Run a Flask server for embedding texts with Sentence Transformers.') -parser.add_argument('--model', type=str, default='all-MiniLM-L6-v2', - help='Model name for Sentence Transformers (default: all-MiniLM-L6-v2)') -parser.add_argument('--port', type=int, default=5000, - help='Port number for the Flask server (default: 5000)') -args = parser.parse_args() - -# Initialize the model based on the command-line argument -model = SentenceTransformer(args.model) - -@app.route('/embed', methods=['POST']) -def embed(): - texts = request.json['texts'] - embeddings = model.encode(texts, convert_to_tensor=False, convert_to_numpy=True) - return jsonify(embeddings.tolist()) - -if __name__ == '__main__': - app.run(port=args.port) diff --git a/examples/ollama/src/index.ts b/examples/ollama/src/index.ts index d2e01780..12c06efc 100644 --- a/examples/ollama/src/index.ts +++ b/examples/ollama/src/index.ts @@ -1,12 +1,10 @@ -import { RAGApplicationBuilder, WebLoader, YoutubeLoader, SitemapLoader, Ollama, LocalEmbeddings } from '../../../src/index.js'; +import { RAGApplicationBuilder, WebLoader, YoutubeLoader, SitemapLoader, Ollama, AdaEmbeddings } from '../../../src/index.js'; import { HNSWDb } from '../../../src/vectorDb/hnswlib-db.js'; const modelName = process.argv[2] || 'llama3'; -const serverUrl = process.argv[3] || 'http://localhost:5000/embed'; -const dimensions = parseInt(process.argv[4], 10) || 384; const llmApplication = await new RAGApplicationBuilder() - .setEmbeddingModel(new LocalEmbeddings( serverUrl, dimensions )) + .setEmbeddingModel(new AdaEmbeddings()) .setModel(new Ollama({ modelName: modelName, baseUrl: 'http://localhost:11434' diff --git a/src/embeddings/local-embeddings.ts b/src/embeddings/local-embeddings.ts deleted file mode 100644 index ecadf656..00000000 --- a/src/embeddings/local-embeddings.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { BaseEmbeddings } from '../interfaces/base-embeddings.js'; -import axios from 'axios'; - -export class LocalEmbeddings implements BaseEmbeddings { - private serverUrl: string; - private dimensions: number; - - // Dimensions for 'all-MiniLM-L6-v2' are 384 - constructor(serverUrl: string = 'http://localhost:5000/embed', dimensions: number = 384) { - this.serverUrl = serverUrl; - this.dimensions = dimensions; - } - - getDimensions(): number { - return this.dimensions; - } - - async embedDocuments(texts: string[]): Promise { - try { - const response = await axios.post(this.serverUrl, { texts }); - return response.data; - } catch (error) { - console.error('Error embedding documents:', error); - throw error; - } - } - - async embedQuery(text: string): Promise { - return this.embedDocuments([text]).then(res => res[0]); - } -} \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index bb9ba6ce..938f4990 100644 --- a/src/index.ts +++ b/src/index.ts @@ -20,7 +20,6 @@ import { AdaEmbeddings } from './embeddings/ada-embeddings.js'; import { CohereEmbeddings } from './embeddings/cohere-embeddings.js'; import { OpenAi3LargeEmbeddings } from './embeddings/openai-3large-embeddings.js'; import { OpenAi3SmallEmbeddings } from './embeddings/openai-3small-embeddings.js'; -import { LocalEmbeddings } from './embeddings/local-embeddings.js'; import { Mistral } from './models/mistral-model.js'; import { HuggingFace } from './models/huggingface-model.js'; import { Anthropic } from './models/anthropic-model.js'; @@ -49,7 +48,6 @@ export { CohereEmbeddings, OpenAi3LargeEmbeddings, OpenAi3SmallEmbeddings, - LocalEmbeddings, Mistral, HuggingFace, Anthropic,