Skip to content

Commit

Permalink
feat: add include patterns for RAG file filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
kevin-on committed Dec 12, 2024
1 parent 9ec22fc commit 06a2144
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/core/rag/ragEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export class RAGEngine {
{
chunkSize: this.settings.ragOptions.chunkSize,
excludePatterns: this.settings.ragOptions.excludePatterns,
includePatterns: this.settings.ragOptions.includePatterns,
reindexAll: options.reindexAll,
},
(indexProgress) => {
Expand Down
11 changes: 11 additions & 0 deletions src/database/modules/vector/VectorManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export class VectorManager {
options: {
chunkSize: number
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
},
updateProgress?: (indexProgress: IndexProgress) => void,
Expand All @@ -66,6 +67,7 @@ export class VectorManager {
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
reindexAll: true,
})
await this.repository.clearAllVectors(embeddingModel)
Expand All @@ -74,6 +76,7 @@ export class VectorManager {
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
})
await this.repository.deleteVectorsForMultipleFiles(
filesToIndex.map((file) => file.path),
Expand Down Expand Up @@ -220,10 +223,12 @@ export class VectorManager {
private async getFilesToIndex({
embeddingModel,
excludePatterns,
includePatterns,
reindexAll,
}: {
embeddingModel: EmbeddingModel
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
}): Promise<TFile[]> {
let filesToIndex = this.app.vault.getMarkdownFiles()
Expand All @@ -232,6 +237,12 @@ export class VectorManager {
return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
})

if (includePatterns.length > 0) {
filesToIndex = filesToIndex.filter((file) => {
return includePatterns.some((pattern) => minimatch(file.path, pattern))
})
}

if (reindexAll) {
return filesToIndex
}
Expand Down
80 changes: 79 additions & 1 deletion src/settings/SettingTab.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ export class SmartCopilotSettingTab extends PluginSettingTab {
new Setting(containerEl)
.setName('Exclude patterns')
.setDesc(
'Files matching these patterns will be excluded from indexing. One pattern per line. Uses glob patterns (e.g., "private/*", "*.tmp"). After changing this, use the command "Rebuild entire vault index" to apply changes.',
'Files matching ANY of these patterns will be excluded from indexing. One pattern per line. Uses glob patterns (e.g., "private/*", "*.tmp"). Leave empty to exclude nothing. After changing this, use the command "Rebuild entire vault index" to apply changes.',
)
.addButton((button) =>
button.setButtonText('Test patterns').onClick(async () => {
Expand Down Expand Up @@ -495,6 +495,42 @@ export class SmartCopilotSettingTab extends PluginSettingTab {
}),
)

new Setting(containerEl)
.setName('Include patterns')
.setDesc(
'If any patterns are specified, ONLY files matching at least one pattern will be included in indexing. One pattern per line. Uses glob patterns (e.g., "notes/*", "*.md"). Leave empty to include all files not excluded by exclude patterns. After changing this, use the command "Rebuild entire vault index" to apply changes.',
)
.addButton((button) =>
button.setButtonText('Test patterns').onClick(async () => {
const patterns = this.plugin.settings.ragOptions.includePatterns
const includedFiles = await findFilesMatchingPatterns(
patterns,
this.plugin.app.vault,
)
new IncludedFilesModal(this.app, includedFiles, patterns).open()
}),
)

new Setting(containerEl)
.setClass('smtcmp-settings-textarea')
.addTextArea((text) =>
text
.setValue(this.plugin.settings.ragOptions.includePatterns.join('\n'))
.onChange(async (value) => {
const patterns = value
.split('\n')
.map((p) => p.trim())
.filter((p) => p.length > 0)
await this.plugin.setSettings({
...this.plugin.settings,
ragOptions: {
...this.plugin.settings.ragOptions,
includePatterns: patterns,
},
})
}),
)

new Setting(containerEl)
.setName('Chunk size')
.setDesc(
Expand Down Expand Up @@ -676,3 +712,45 @@ class ExcludedFilesModal extends Modal {
contentEl.empty()
}
}

class IncludedFilesModal extends Modal {
private files: TFile[]
private patterns: string[]

constructor(app: App, files: TFile[], patterns: string[]) {
super(app)
this.files = files
this.patterns = patterns
}

onOpen() {
const { contentEl } = this
contentEl.empty()

this.titleEl.setText(`Included Files (${this.files.length})`)

if (this.patterns.length === 0) {
contentEl.createEl('p', {
text: 'No inclusion patterns specified - all files will be included (except those matching exclusion patterns)',
})
return
}

if (this.files.length === 0) {
contentEl.createEl('p', {
text: 'No files match the inclusion patterns',
})
return
}

const list = contentEl.createEl('ul')
this.files.forEach((file) => {
list.createEl('li', { text: file.path })
})
}

onClose() {
const { contentEl } = this
contentEl.empty()
}
}
2 changes: 2 additions & 0 deletions src/types/settings.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ describe('parseSmartCopilotSettings', () => {
minSimilarity: 0.0,
limit: 10,
excludePatterns: [],
includePatterns: [],
},
})
})
Expand Down Expand Up @@ -114,6 +115,7 @@ describe('settings migration', () => {
minSimilarity: 0.0,
limit: 10,
excludePatterns: [],
includePatterns: [],
},
})
})
Expand Down
2 changes: 2 additions & 0 deletions src/types/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const ragOptionsSchema = z.object({
minSimilarity: z.number().catch(0.0),
limit: z.number().catch(10),
excludePatterns: z.array(z.string()).catch([]),
includePatterns: z.array(z.string()).catch([]),
})

const smartCopilotSettingsSchema = z.object({
Expand Down Expand Up @@ -90,6 +91,7 @@ const smartCopilotSettingsSchema = z.object({
minSimilarity: 0.0,
limit: 10,
excludePatterns: [],
includePatterns: [],
}),
})

Expand Down

0 comments on commit 06a2144

Please sign in to comment.