diff --git a/crates/tabby-scheduler/src/code/cache.rs b/crates/tabby-scheduler/src/code/cache.rs index 32fa9af8ad64..ba3512050d1f 100644 --- a/crates/tabby-scheduler/src/code/cache.rs +++ b/crates/tabby-scheduler/src/code/cache.rs @@ -19,7 +19,7 @@ fn get_git_hash(path: &Path) -> Result<String> { } #[derive(Deserialize, Serialize, Debug)] -pub struct SourceFileKey { +struct SourceFileKey { path: PathBuf, language: String, git_hash: String, diff --git a/crates/tabby-scheduler/src/code/index.rs b/crates/tabby-scheduler/src/code/index.rs index 1f51db980bfd..8e4bb840eb0a 100644 --- a/crates/tabby-scheduler/src/code/index.rs +++ b/crates/tabby-scheduler/src/code/index.rs @@ -3,7 +3,7 @@ use kv::Batch; use tabby_common::config::RepositoryConfig; use tracing::warn; -use super::{cache::CacheStore, create_code_index, intelligence::SourceCode}; +use super::{cache::CacheStore, create_code_index, intelligence::SourceCode, KeyedSourceCode}; use crate::Indexer; // Magic numbers @@ -25,7 +25,7 @@ pub fn garbage_collection(cache: &mut CacheStore) { async fn add_changed_documents( cache: &mut CacheStore, repository: &RepositoryConfig, - index: &Indexer<SourceCode>, + index: &Indexer<KeyedSourceCode>, ) { let mut indexed_files_batch = Batch::new(); for file in Walk::new(repository.dir()) { @@ -36,20 +36,25 @@ async fn add_changed_documents( continue; } }; - let Some(source_file) = cache.get_source_file(repository, file.path()) else { + let Some(code) = cache.get_source_file(repository, file.path()) else { continue; }; - if !is_valid_file(&source_file) { + if !is_valid_file(&code) { continue; } - let (file_id, indexed) = cache.check_indexed(file.path()); + let (key, indexed) = cache.check_indexed(file.path()); if indexed { continue; } - index.add(source_file).await; + index + .add(KeyedSourceCode { + key: key.clone(), + code, + }) + .await; indexed_files_batch - .set(&file_id, &String::new()) + .set(&key, &String::new()) .expect("Failed to mark file as indexed"); } @@ -57,7 +62,7 @@ async fn add_changed_documents( cache.apply_indexed(indexed_files_batch); } -fn remove_staled_documents(cache: &mut CacheStore, index: &Indexer<SourceCode>) { +fn remove_staled_documents(cache: &mut CacheStore, index: &Indexer<KeyedSourceCode>) { // Create a new writer to commit deletion of removed indexed files let gc_commit = cache.prepare_garbage_collection_for_indexed_files(|key| { index.delete(key); diff --git a/crates/tabby-scheduler/src/code/mod.rs b/crates/tabby-scheduler/src/code/mod.rs index c42b3272a989..e07f8f44f20d 100644 --- a/crates/tabby-scheduler/src/code/mod.rs +++ b/crates/tabby-scheduler/src/code/mod.rs @@ -5,8 +5,8 @@ use serde_json::json; use tabby_common::{config::RepositoryConfig, index::code}; use tracing::{info, warn}; -use self::{cache::SourceFileKey, intelligence::SourceCode}; -use crate::{code::intelligence::CodeIntelligence, Indexer, IndexAttributeBuilder}; +use self::intelligence::SourceCode; +use crate::{code::intelligence::CodeIntelligence, IndexAttributeBuilder, Indexer}; /// Module for creating code search index. mod cache; @@ -41,51 +41,53 @@ impl CodeIndex { } } +struct KeyedSourceCode { + key: String, + code: SourceCode, +} + struct CodeBuilder; #[async_trait] -impl IndexAttributeBuilder<SourceCode> for CodeBuilder { +impl IndexAttributeBuilder<KeyedSourceCode> for CodeBuilder { fn format_id(&self, id: &str) -> String { format!("code:{}", id) } - async fn build_id(&self, source_code: &SourceCode) -> String { - let path = source_code.absolute_path(); - let id = SourceFileKey::try_from(path.as_path()) - .expect("Failed to build ID from path") - .to_string(); - self.format_id(&id) + async fn build_id(&self, source_code: &KeyedSourceCode) -> String { + self.format_id(&source_code.key) } - async fn build_attributes(&self, _source_code: &SourceCode) -> serde_json::Value { + async fn build_attributes(&self, _source_code: &KeyedSourceCode) -> serde_json::Value { json!({}) } async fn build_chunk_attributes( &self, - source_file: &SourceCode, + source_code: &KeyedSourceCode, ) -> BoxStream<(Vec<String>, serde_json::Value)> { - let text = match source_file.read_content() { + let source_code = &source_code.code; + let text = match source_code.read_content() { Ok(content) => content, Err(e) => { warn!( "Failed to read content of '{}': {}", - source_file.filepath, e + source_code.filepath, e ); return Box::pin(futures::stream::empty()); } }; - let source_file = source_file.clone(); + let source_code = source_code.clone(); let s = stream! { let intelligence = CodeIntelligence::default(); for (start_line, body) in intelligence.chunks(&text) { let tokens = code::tokenize_code(body); yield (tokens, json!({ - code::fields::CHUNK_FILEPATH: source_file.filepath, - code::fields::CHUNK_GIT_URL: source_file.git_url, - code::fields::CHUNK_LANGUAGE: source_file.language, + code::fields::CHUNK_FILEPATH: source_code.filepath, + code::fields::CHUNK_GIT_URL: source_code.git_url, + code::fields::CHUNK_LANGUAGE: source_code.language, code::fields::CHUNK_BODY: body, code::fields::CHUNK_START_LINE: start_line, })); @@ -96,7 +98,7 @@ impl IndexAttributeBuilder<SourceCode> for CodeBuilder { } } -fn create_code_index() -> Indexer<SourceCode> { +fn create_code_index() -> Indexer<KeyedSourceCode> { let builder = CodeBuilder; Indexer::new(builder) } diff --git a/crates/tabby-scheduler/src/doc/mod.rs b/crates/tabby-scheduler/src/doc/mod.rs index 3caea66699ed..04f1545c1d5d 100644 --- a/crates/tabby-scheduler/src/doc/mod.rs +++ b/crates/tabby-scheduler/src/doc/mod.rs @@ -10,7 +10,7 @@ use tantivy::doc; use text_splitter::TextSplitter; use tracing::warn; -use crate::{Indexer, IndexAttributeBuilder}; +use crate::{IndexAttributeBuilder, Indexer}; pub struct SourceDocument { pub id: String, diff --git a/crates/tabby-scheduler/src/lib.rs b/crates/tabby-scheduler/src/lib.rs index 624d1b5e1cfe..10cf83830313 100644 --- a/crates/tabby-scheduler/src/lib.rs +++ b/crates/tabby-scheduler/src/lib.rs @@ -10,7 +10,7 @@ pub use code::CodeIndex; use crawl::crawl_pipeline; use doc::SourceDocument; use futures::StreamExt; -use index::{Indexer, IndexAttributeBuilder}; +use index::{IndexAttributeBuilder, Indexer}; mod doc; use std::{env, sync::Arc};