Skip to content

Commit

Permalink
Merge pull request #157 from llm-tools/monorepo
Browse files Browse the repository at this point in the history
Allow dynamic loaders on markdown local
  • Loading branch information
adhityan authored Nov 5, 2024
2 parents df0bfa7 + a4765db commit 7732a52
Show file tree
Hide file tree
Showing 5 changed files with 557 additions and 12 deletions.
10 changes: 8 additions & 2 deletions core/embedjs/src/loaders/local-path-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,16 @@ export class LocalPathLoader extends BaseLoader<{ type: 'LocalPathLoader' }> {

if (!isDir) {
const stream = fs.createReadStream(currentPath);
const { mime } = await getMimeType(stream);
this.debug(`File '${this.path}' has mime type '${mime}'`);
let { mime } = await getMimeType(stream);
stream.destroy();

this.debug(`File '${this.path}' has mime type '${mime}'`);
if (mime === 'application/octet-stream') {
const extension = currentPath.split('.').pop().toLowerCase();
if (extension === 'md' || extension === 'mdx') mime = 'text/markdown';
this.debug(`File '${this.path}' mime type updated to 'text/markdown'`);
}

try {
const loader = await createLoaderFromMimeType(currentPath, mime);
for await (const result of await loader.getUnfilteredChunks()) {
Expand Down
11 changes: 11 additions & 0 deletions core/embedjs/src/util/mime.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import mime from 'mime';
import createDebugMessages from 'debug';

import { BaseLoader } from '@llm-tools/embedjs-interfaces';
import { TextLoader } from '../loaders/text-loader.js';

Expand Down Expand Up @@ -88,6 +89,16 @@ export async function createLoaderFromMimeType(loaderData: string, mimeType: str
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported XmlLoader');
return new XmlLoader({ filePathOrUrl: loaderData });
}
case 'text/x-markdown':
case 'text/markdown': {
const { MarkdownLoader } = await import('@llm-tools/embedjs-loader-markdown').catch(() => {
throw new Error(
'Package `@llm-tools/embedjs-loader-markdown` needs to be installed to load markdown files',
);
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported MarkdownLoader');
return new MarkdownLoader({ filePathOrUrl: loaderData });
}
case undefined:
throw new Error(`MIME type could not be detected. Please file an issue if you think this is a bug.`);
default:
Expand Down
2 changes: 1 addition & 1 deletion docs/mint.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@
"pages": [
"components/data-sources/pdf",
"components/data-sources/csv",
"components/data-sources/xml",
"components/data-sources/json",
"components/data-sources/text",
"components/data-sources/web-page",
Expand All @@ -83,6 +82,7 @@
"components/data-sources/excel",
"components/data-sources/sitemap",
"components/data-sources/markdown",
"components/data-sources/xml",
"components/data-sources/directory",
"components/data-sources/custom"
]
Expand Down
5 changes: 2 additions & 3 deletions examples/markdown/src/main.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import 'dotenv/config';
import { RAGApplicationBuilder } from '@llm-tools/embedjs';
import { LocalPathLoader, RAGApplicationBuilder } from '@llm-tools/embedjs';
import { OpenAi, OpenAiEmbeddings } from '@llm-tools/embedjs-openai';
import { MarkdownLoader } from '@llm-tools/embedjs-loader-markdown';
import { HNSWDb } from '@llm-tools/embedjs-hnswlib';

const llmApplication = await new RAGApplicationBuilder()
Expand All @@ -10,5 +9,5 @@ const llmApplication = await new RAGApplicationBuilder()
.setVectorDatabase(new HNSWDb())
.build();

await llmApplication.addLoader(new MarkdownLoader({ filePathOrUrl: './docs/get-started/quickstart.mdx' }));
await llmApplication.addLoader(new LocalPathLoader({ path: './docs' }));
console.log(await llmApplication.query('How do you create an embedJs application?'));
Loading

0 comments on commit 7732a52

Please sign in to comment.