Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move BaseEmbedding to abstract class #181

Merged
merged 3 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/embedjs-interfaces/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@llm-tools/embedjs-interfaces",
"version": "0.1.22",
"version": "0.1.23",
"description": "Interfaces for extending the embedjs ecosystem",
"dependencies": {
"@langchain/core": "^0.3.19",
Expand Down
11 changes: 7 additions & 4 deletions core/embedjs-interfaces/src/interfaces/base-embeddings.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
export interface BaseEmbeddings {
embedDocuments(texts: string[]): Promise<number[][]>;
embedQuery(text: string): Promise<number[]>;
getDimensions(): Promise<number>;
export abstract class BaseEmbeddings {
// eslint-disable-next-line @typescript-eslint/no-empty-function
public async init(): Promise<void> {}

public abstract embedDocuments(texts: string[]): Promise<number[][]>;
public abstract embedQuery(text: string): Promise<number[]>;
public abstract getDimensions(): Promise<number>;
}
4 changes: 2 additions & 2 deletions core/embedjs-utils/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-utils",
"version": "0.1.22",
"version": "0.1.23",
"description": "Useful util functions when extending the embedjs ecosystem",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22"
"@llm-tools/embedjs-interfaces": "0.1.23"
},
"type": "module",
"main": "./src/index.js",
Expand Down
6 changes: 3 additions & 3 deletions core/embedjs/package.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"type": "module",
"name": "@llm-tools/embedjs",
"version": "0.1.22",
"version": "0.1.23",
"description": "A NodeJS RAG framework to easily work with LLMs and custom datasets",
"dependencies": {
"@langchain/textsplitters": "^0.1.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-utils": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@llm-tools/embedjs-utils": "0.1.23",
"debug": "^4.3.7",
"langchain": "^0.3.6",
"md5": "^2.3.0",
Expand Down
4 changes: 2 additions & 2 deletions core/embedjs/src/core/rag-application.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ export class RAGApplication {

this.searchResultCount = llmBuilder.getSearchResultCount();
this.embeddingRelevanceCutOff = llmBuilder.getEmbeddingRelevanceCutOff();

RAGEmbedding.init(llmBuilder.getEmbeddingModel());
}

/**
Expand All @@ -57,6 +55,8 @@ export class RAGApplication {
* LLM based on the configuration provided
*/
public async init(llmBuilder: RAGApplicationBuilder) {
await RAGEmbedding.init(llmBuilder.getEmbeddingModel());

this.model = await this.getModel(llmBuilder.getModel());
if (!this.model) this.debug('No base model set; query function unavailable!');
else BaseModel.setDefaultTemperature(llmBuilder.getTemperature());
Expand Down
3 changes: 2 additions & 1 deletion core/embedjs/src/core/rag-embedding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import { Document } from 'langchain/document';
export class RAGEmbedding {
private static singleton: RAGEmbedding;

public static init(embeddingModel: BaseEmbeddings) {
public static async init(embeddingModel: BaseEmbeddings) {
if (!this.singleton) {
await embeddingModel.init();
this.singleton = new RAGEmbedding(embeddingModel);
}
}
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-astra/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-astradb",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add AstraDB support to embedjs",
"dependencies": {
"@datastax/astra-db-ts": "^1.5.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-cosmos/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-cosmos",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add CosmosDB support to embedjs",
"dependencies": {
"@azure/cosmos": "^4.2.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-hnswlib/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-hnswlib",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add HNSWLib support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7",
"hnswlib-node": "^3.0.0"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-lancedb/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-lancedb",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add LanceDb support to embedjs",
"dependencies": {
"@lancedb/lancedb": "^0.13.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"compute-cosine-similarity": "^1.1.0",
"debug": "^4.3.7"
},
Expand Down
6 changes: 3 additions & 3 deletions databases/embedjs-libsql/package.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"name": "@llm-tools/embedjs-libsql",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add LibSQL support to embedjs",
"dependencies": {
"@libsql/client": "^0.14.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-utils": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@llm-tools/embedjs-utils": "0.1.23",
"debug": "^4.3.7"
},
"type": "module",
Expand Down
13 changes: 11 additions & 2 deletions databases/embedjs-libsql/src/libsql-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,17 @@ export class LibSqlStore implements BaseStore {
}

async loaderCustomSet<T extends Record<string, unknown>>(loaderId: string, key: string, value: T): Promise<void> {
this.debug(`LibSQL custom set '${key}' with values`, value);
await this.loaderCustomDelete(key);

await this.client.execute({
this.debug(`LibSQL custom set '${key}' insert started`);
const results = await this.client.execute({
sql: `INSERT INTO ${this.loadersCustomDataTableName} (key, loaderId, value)
VALUES (?, ?, ?)`,
args: [key, loaderId, JSON.stringify(value)],
});

this.debug(`LibSQL custom set for key '${key}' resulted in`, results.rows);
}

async loaderCustomGet<T extends Record<string, unknown>>(key: string): Promise<T> {
Expand All @@ -144,10 +148,15 @@ export class LibSqlStore implements BaseStore {
}

async loaderCustomDelete(key: string): Promise<void> {
await this.client.execute(`DELETE FROM ${this.loadersCustomDataTableName} WHERE key = '${key}';`);
this.debug(`LibSQL custom delete '${key}'`);
const results = await this.client.execute(
`DELETE FROM ${this.loadersCustomDataTableName} WHERE key = '${key}';`,
);
this.debug(`LibSQL custom delete for key '${key}' resulted in`, results.rowsAffected);
}

async deleteLoaderMetadataAndCustomValues(loaderId: string): Promise<void> {
this.debug(`LibSQL deleteLoaderMetadataAndCustomValues for loader '${loaderId}'`);
await this.client.execute(`DELETE FROM ${this.loadersTableName} WHERE id = '${loaderId}';`);
await this.client.execute(`DELETE FROM ${this.loadersCustomDataTableName} WHERE loaderId = '${loaderId}';`);
}
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-lmdb/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-lmdb",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add LMDB support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7",
"lmdb": "^3.1.6"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-mongodb/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-mongodb",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add MongoDB support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7",
"mongodb": "^6.11.0"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-pinecone/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-pinecone",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add Pinecone support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@pinecone-database/pinecone": "^4.0.0",
"debug": "^4.3.7"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-qdrant/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-qdrant",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add Qdrant support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@qdrant/js-client-rest": "^1.12.0",
"debug": "^4.3.7",
"uuid": "^11.0.3"
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-redis/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-redis",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add Redis support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"ioredis": "^5.4.1"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-weaviate/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-weaviate",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add Weaviate support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"compute-cosine-similarity": "^1.1.0",
"debug": "^4.3.7",
"weaviate-ts-client": "^2.2.0"
Expand Down
7 changes: 5 additions & 2 deletions examples/confluence/src/main.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import 'dotenv/config';
import path from 'node:path';
import { RAGApplicationBuilder } from '@llm-tools/embedjs';
import { LibSqlDb, LibSqlStore } from '@llm-tools/embedjs-libsql';
import { OpenAi, OpenAiEmbeddings } from '@llm-tools/embedjs-openai';
import { ConfluenceLoader } from '@llm-tools/embedjs-loader-confluence';
import { HNSWDb } from '@llm-tools/embedjs-hnswlib';

const databasePath = path.resolve('./examples/confluence/data.db');
const llmApplication = await new RAGApplicationBuilder()
.setStore(new LibSqlStore({ path: databasePath }))
.setVectorDatabase(new LibSqlDb({ path: databasePath }))
.setModel(new OpenAi({ modelName: 'gpt-4o' }))
.setEmbeddingModel(new OpenAiEmbeddings())
.setVectorDatabase(new HNSWDb())
.build();

await llmApplication.addLoader(new ConfluenceLoader({ spaceNames: ['DEMO'] }));
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-confluence/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-loader-confluence",
"version": "0.1.22",
"version": "0.1.23",
"description": "Confluence loader for embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-loader-web": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@llm-tools/embedjs-loader-web": "0.1.23",
"confluence.js": "^1.7.4",
"debug": "^4.3.7",
"md5": "^2.3.0"
Expand Down
33 changes: 21 additions & 12 deletions loaders/embedjs-loader-confluence/src/confluence-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,13 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {

private async *processSpace(spaceKey: string) {
this.debug('Processing space', spaceKey);

try {
const spaceContent = await this.confluence.space.getContentForSpace({ spaceKey });
this.debug(`Confluence space '${spaceKey}' has '${spaceContent['page'].results.length}' root pages`);

for (const { id } of spaceContent['page'].results) {
for await (const result of this.processPage(id)) {
for (const { id, title } of spaceContent['page'].results) {
for await (const result of this.processPage(id, title)) {
yield result;
}
}
Expand All @@ -78,8 +79,10 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
}
}

private async *processPage(pageId: string) {
private async *processPage(pageId: string, title: string) {
this.debug('Processing page', title);
let confluenceVersion = 0;

try {
const spaceProperties = await this.confluence.content.getContentById({
id: pageId,
Expand All @@ -89,29 +92,29 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
if (!spaceProperties.version.number) throw new Error('Version number not found in space properties...');
confluenceVersion = spaceProperties.version.number;
} catch (e) {
this.debug('Could not get page properties. Page will be SKIPPED!', pageId, e.response);
this.debug('Could not get page properties. Page will be SKIPPED!', title, e.response);
return;
}

let doProcess = false;
if (!(await this.checkInCache(pageId))) {
this.debug(`Processing '${pageId}' for the FIRST time...`);
this.debug(`Processing '${title}' for the FIRST time...`);
doProcess = true;
} else {
const cacheVersion = (await this.getFromCache(pageId)).version;
if (cacheVersion !== confluenceVersion) {
this.debug(
`For page '${pageId}' - version in cache is ${cacheVersion} and confluence version is ${confluenceVersion}. This page will be PROCESSED.`,
`For page '${title}' - version in cache is ${cacheVersion} and confluence version is ${confluenceVersion}. This page will be PROCESSED.`,
);
doProcess = true;
} else
this.debug(
`For page '${pageId}' - version in cache and confluence are the same ${confluenceVersion}. This page will be SKIPPED.`,
`For page '${title}' - version in cache and confluence are the same ${confluenceVersion}. This page will be SKIPPED.`,
);
}

if (!doProcess) {
this.debug(`Skipping page '${pageId}'`);
this.debug(`Skipping page '${title}'`);
return;
}

Expand All @@ -126,21 +129,27 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
return;
}

this.debug(`Processing content for page '${title}'...`);
for await (const result of this.getContentChunks(content.body.view.value, content._links.webui)) {
yield result;
}

await this.saveToCache(pageId, { version: confluenceVersion });

if (content.children) {
for (const { id } of content.children.page.results) {
for await (const result of this.processPage(id)) {
yield result;
for (const { id, title } of content.children.page.results) {
try {
for await (const result of this.processPage(id, title)) {
yield result;
}
} catch (e) {
this.debug(`Error! Could not process page child '${title}'`, pageId, e);
return;
}
}
}
} catch (e) {
this.debug('Error! Could not process page content or children', pageId, e);
this.debug('Error! Could not process page content', pageId, e);
return;
}
}
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-csv/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-loader-csv",
"version": "0.1.22",
"version": "0.1.23",
"description": "CSV loader for embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-utils": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@llm-tools/embedjs-utils": "0.1.23",
"csv-parse": "^5.6.0",
"debug": "^4.3.7",
"md5": "^2.3.0"
Expand Down
Loading
Loading