Skip to content

Commit

Permalink
process html to markdown and include in prompt
Browse files Browse the repository at this point in the history
  • Loading branch information
glowingjade committed Oct 24, 2024
1 parent da17bc2 commit c5ff133
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 2 deletions.
7 changes: 7 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"@types/react": "^18.3.10",
"@types/react-dom": "^18.3.0",
"@types/react-syntax-highlighter": "^15.5.13",
"@types/turndown": "^5.0.5",
"@types/uuid": "^10.0.0",
"@typescript-eslint/eslint-plugin": "5.29.0",
"@typescript-eslint/parser": "5.29.0",
Expand Down
56 changes: 54 additions & 2 deletions src/utils/promptGenerator.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { App, TFile } from 'obsidian'
import { App, TFile, requestUrl } from 'obsidian'
import TurndownService from 'turndown'

import { editorStateToPlainText } from '../components/chat-view/chat-input/utils/editor-state-to-plain-text'
import { QueryProgressState } from '../components/chat-view/QueryProgress'
Expand All @@ -8,6 +9,7 @@ import {
MentionableBlock,
MentionableFile,
MentionableFolder,
MentionableUrl,
MentionableVault,
} from '../types/mentionable'
import { SmartCopilotSettings } from '../types/settings'
Expand Down Expand Up @@ -217,8 +219,29 @@ ${results
})
.join('')

const urls = message.mentionables.filter(
(m): m is MentionableUrl => m.type === 'url',
)

const urlPrompt =
urls.length > 0
? `## Potentially Relevant Websearch Results
${(
await Promise.all(
urls.map(
async ({ url }) => `\`\`\`
Website URL: ${url}
Website Content:
${await this.getWebsiteContent(url)}
\`\`\``,
),
)
).join('\n')}
`
: ''

return {
promptContent: `${filePrompt}${blockPrompt}\n\n${query}\n\n`,
promptContent: `${filePrompt}${blockPrompt}${urlPrompt}\n\n${query}\n\n`,
shouldUseRAG,
}
}
Expand Down Expand Up @@ -326,4 +349,33 @@ When writing out new markdown blocks, remember not to include "line_number|" at
})
return linesWithNumbers.join('\n')
}

/**
* TODO: Improve markdown conversion logic
* - filter visually hidden elements
* ...
*/
private async getWebsiteContent(url: string): Promise<string> {
const response = await requestUrl({ url })

const turndown = new TurndownService()

turndown.addRule('ignoreEmptyLinks', {
filter: (node) => {
return (
node.nodeName === 'A' &&
node.textContent?.trim() === '' &&
!node.querySelector('img')
)
},
replacement: () => '',
})

turndown.remove('script')
turndown.remove('style')

const markdown: string = turndown.turndown(response.text)

return markdown
}
}

0 comments on commit c5ff133

Please sign in to comment.