Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add include patterns for RAG file filtering #162

Merged
merged 3 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/core/rag/ragEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export class RAGEngine {
{
chunkSize: this.settings.ragOptions.chunkSize,
excludePatterns: this.settings.ragOptions.excludePatterns,
includePatterns: this.settings.ragOptions.includePatterns,
reindexAll: options.reindexAll,
},
(indexProgress) => {
Expand Down
11 changes: 11 additions & 0 deletions src/database/modules/vector/VectorManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export class VectorManager {
options: {
chunkSize: number
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
},
updateProgress?: (indexProgress: IndexProgress) => void,
Expand All @@ -66,6 +67,7 @@ export class VectorManager {
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
reindexAll: true,
})
await this.repository.clearAllVectors(embeddingModel)
Expand All @@ -74,6 +76,7 @@ export class VectorManager {
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
})
await this.repository.deleteVectorsForMultipleFiles(
filesToIndex.map((file) => file.path),
Expand Down Expand Up @@ -220,10 +223,12 @@ export class VectorManager {
private async getFilesToIndex({
embeddingModel,
excludePatterns,
includePatterns,
reindexAll,
}: {
embeddingModel: EmbeddingModel
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
}): Promise<TFile[]> {
let filesToIndex = this.app.vault.getMarkdownFiles()
Expand All @@ -232,6 +237,12 @@ export class VectorManager {
return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
})

if (includePatterns.length > 0) {
filesToIndex = filesToIndex.filter((file) => {
return includePatterns.some((pattern) => minimatch(file.path, pattern))
})
}

if (reindexAll) {
return filesToIndex
}
Expand Down
80 changes: 78 additions & 2 deletions src/settings/SettingTab.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -459,10 +459,45 @@ export class SmartCopilotSettingTab extends PluginSettingTab {
renderRAGSection(containerEl: HTMLElement): void {
new Setting(containerEl).setHeading().setName('RAG')

new Setting(containerEl)
.setName('Include patterns')
.setDesc(
'If any patterns are specified, ONLY files matching at least one pattern will be included in indexing. One pattern per line. Uses glob patterns (e.g., "notes/*", "*.md"). Leave empty to include all files not excluded by exclude patterns. After changing this, use the command "Rebuild entire vault index" to apply changes.',
)
.addButton((button) =>
button.setButtonText('Test patterns').onClick(async () => {
const patterns = this.plugin.settings.ragOptions.includePatterns
const includedFiles = await findFilesMatchingPatterns(
patterns,
this.plugin.app.vault,
)
new IncludedFilesModal(this.app, includedFiles, patterns).open()
}),
)
new Setting(containerEl)
.setClass('smtcmp-settings-textarea')
.addTextArea((text) =>
text
.setValue(this.plugin.settings.ragOptions.includePatterns.join('\n'))
.onChange(async (value) => {
const patterns = value
.split('\n')
.map((p) => p.trim())
.filter((p) => p.length > 0)
await this.plugin.setSettings({
...this.plugin.settings,
ragOptions: {
...this.plugin.settings.ragOptions,
includePatterns: patterns,
},
})
}),
)

new Setting(containerEl)
.setName('Exclude patterns')
.setDesc(
'Files matching these patterns will be excluded from indexing. One pattern per line. Uses glob patterns (e.g., "private/*", "*.tmp"). After changing this, use the command "Rebuild entire vault index" to apply changes.',
'Files matching ANY of these patterns will be excluded from indexing. One pattern per line. Uses glob patterns (e.g., "private/*", "*.tmp"). Leave empty to exclude nothing. After changing this, use the command "Rebuild entire vault index" to apply changes.',
)
.addButton((button) =>
button.setButtonText('Test patterns').onClick(async () => {
Expand All @@ -474,7 +509,6 @@ export class SmartCopilotSettingTab extends PluginSettingTab {
new ExcludedFilesModal(this.app, excludedFiles).open()
}),
)

new Setting(containerEl)
.setClass('smtcmp-settings-textarea')
.addTextArea((text) =>
Expand Down Expand Up @@ -676,3 +710,45 @@ class ExcludedFilesModal extends Modal {
contentEl.empty()
}
}

class IncludedFilesModal extends Modal {
private files: TFile[]
private patterns: string[]

constructor(app: App, files: TFile[], patterns: string[]) {
super(app)
this.files = files
this.patterns = patterns
}

onOpen() {
const { contentEl } = this
contentEl.empty()

this.titleEl.setText(`Included Files (${this.files.length})`)

if (this.patterns.length === 0) {
contentEl.createEl('p', {
text: 'No inclusion patterns specified - all files will be included (except those matching exclusion patterns)',
})
return
}

if (this.files.length === 0) {
contentEl.createEl('p', {
text: 'No files match the inclusion patterns',
})
return
}

const list = contentEl.createEl('ul')
this.files.forEach((file) => {
list.createEl('li', { text: file.path })
})
}

onClose() {
const { contentEl } = this
contentEl.empty()
}
}
2 changes: 2 additions & 0 deletions src/types/settings.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ describe('parseSmartCopilotSettings', () => {
minSimilarity: 0.0,
limit: 10,
excludePatterns: [],
includePatterns: [],
},
})
})
Expand Down Expand Up @@ -114,6 +115,7 @@ describe('settings migration', () => {
minSimilarity: 0.0,
limit: 10,
excludePatterns: [],
includePatterns: [],
},
})
})
Expand Down
2 changes: 2 additions & 0 deletions src/types/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const ragOptionsSchema = z.object({
minSimilarity: z.number().catch(0.0),
limit: z.number().catch(10),
excludePatterns: z.array(z.string()).catch([]),
includePatterns: z.array(z.string()).catch([]),
})

const smartCopilotSettingsSchema = z.object({
Expand Down Expand Up @@ -90,6 +91,7 @@ const smartCopilotSettingsSchema = z.object({
minSimilarity: 0.0,
limit: 10,
excludePatterns: [],
includePatterns: [],
}),
})

Expand Down
Loading