Skip to content

Commit

Permalink
updated readme to include sitemap loader
Browse files Browse the repository at this point in the history
  • Loading branch information
adhityan committed Feb 2, 2024
1 parent d6755a1 commit 1a09703
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 62 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ The library also supports optioanl caching for embeddings and loaders. Chunks th
- [Youtube search](#youtube-search)
- [Web page](#web-page)
- [Confluence](#confluence)
- [Sitemap](#sitemap)
- [Text](#text)
- [Custom loader](#add-a-custom-loader)
- [How to request more loaders](#more-loaders-coming-soon)
Expand Down Expand Up @@ -264,8 +265,26 @@ To add a confluence space, use `ConfluenceLoader`.
.addLoader(new ConfluenceLoader({ spaceNames: ['...'] }))
```

You also need to set the following environment variables -

```bash
CONFLUENCE_BASE_URL=<your space base url>
CONFLUENCE_USER_NAME=<your email id or username>
CONFLUENCE_API_TOKEN=<your personal or bot access token>
```

**Note:** The confluence space name is the value you see in the url in the space overview page `/wiki/spaces/{{ space name }}/overview`.

## Sitemap

To add a XML sitemap, use `SitemapLoader`.

```TS
.addLoader(new SitemapLoader({ url: '...' }))
```

This will load all URLs in a sitemap via the WebLoader.

## Text

To supply your own text, use `TextLoader`.
Expand Down
90 changes: 45 additions & 45 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 15 additions & 15 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@llm-tools/embedjs",
"version": "0.0.49",
"version": "0.0.50",
"description": "A NodeJS RAG framework to easily work with LLMs and custom datasets",
"main": "dist/index.js",
"types": "dist/index.d.ts",
Expand Down Expand Up @@ -50,12 +50,12 @@
"dependencies": {
"@huggingface/inference": "^2.6.4",
"@langchain/cohere": "^0.0.3",
"@langchain/openai": "^0.0.13",
"@langchain/openai": "^0.0.14",
"axios": "^1.6.7",
"confluence.js": "^1.7.2",
"debug": "^4.3.4",
"html-to-text": "^9.0.5",
"langchain": "^0.1.11",
"langchain": "^0.1.12",
"md5": "^2.3.0",
"pdf-parse-fork": "^1.2.0",
"sitemapper": "^3.2.8",
Expand All @@ -70,12 +70,12 @@
"@types/debug": "^4.1.12",
"@types/html-to-text": "^9.0.4",
"@types/md5": "^2.3.5",
"@types/node": "^20.11.14",
"@types/node": "^20.11.16",
"@types/usetube": "^2.1.2",
"@typescript-eslint/eslint-plugin": "^6.20.0",
"@typescript-eslint/parser": "^6.20.0",
"chromadb": "^1.8.1",
"cohere-ai": "^7.7.4",
"cohere-ai": "^7.7.5",
"eslint": "^8.56.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.1.3",
Expand All @@ -85,20 +85,26 @@
"prettier": "^3.2.4",
"rimraf": "^5.0.5",
"typescript": "^5.3.3",
"vectordb": "^0.4.6",
"vectordb": "^0.4.7",
"weaviate-ts-client": "^2.0.0"
},
"peerDependencies": {
"@pinecone-database/pinecone": "^1.1.2",
"@qdrant/js-client-rest": "^1.7.0",
"chromadb": "^1.7.3",
"cohere-ai": "^7.6.2",
"chromadb": "^1.8.1",
"cohere-ai": "^7.7.5",
"hnswlib-node": "^2.1.0",
"ioredis": "^5.3.2",
"lmdb": "^2.9.2",
"vectordb": "^0.4.6",
"vectordb": "^0.4.7",
"weaviate-ts-client": "^2.0.0"
},
"overrides": {
"@pinecone-database/pinecone": "^1.1.2",
"weaviate-ts-client": "^2.0.0",
"hnswlib-node": "^2.1.0",
"vectordb": "^0.4.7"
},
"peerDependenciesMeta": {
"@pinecone-database/pinecone": {
"optional": true
Expand Down Expand Up @@ -131,12 +137,6 @@
"engines": {
"node": ">= 18.0.0"
},
"overrides": {
"@pinecone-database/pinecone": "^1.1.2",
"weaviate-ts-client": "^2.0.0",
"hnswlib-node": "^2.1.0",
"vectordb": "^0.4.6"
},
"exports": {
".": {
"types": "./dist/index.d.ts",
Expand Down
7 changes: 5 additions & 2 deletions src/loaders/confluence-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }> {
private readonly debug = createDebugMessages('embedjs:loader:ConfluenceLoader');

private readonly confluence: ConfluenceClient;
private readonly confluenceBaseUrl: string;
private readonly spaceNames: string[];

constructor({
Expand All @@ -26,8 +27,10 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }> {
super(`ConfluenceLoader_${md5(spaceNames.join(','))}`);

this.spaceNames = spaceNames;
this.confluenceBaseUrl = confluenceBaseUrl ?? process.env.CONFLUENCE_BASE_URL;

this.confluence = new ConfluenceClient({
host: confluenceBaseUrl ?? process.env.CONFLUENCE_BASE_URL,
host: confluenceBaseUrl,
authentication: {
basic: {
username: confluenceUsername ?? process.env.CONFLUENCE_USER_NAME,
Expand Down Expand Up @@ -79,7 +82,7 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }> {
contentHash: result.contentHash,
metadata: {
type: <'ConfluenceLoader'>'ConfluenceLoader',
source: content.title,
source: `${this.confluenceBaseUrl}${content._links.webui}`,
},
};
}
Expand Down

0 comments on commit 1a09703

Please sign in to comment.