Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add author field to issue and pull context #3503

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 31 additions & 10 deletions crates/tabby-common/src/api/structured_doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ pub struct DocSearchWebDocument {
pub struct DocSearchIssueDocument {
pub title: String,
pub link: String,
pub author: String,
zwpaper marked this conversation as resolved.
Show resolved Hide resolved
pub body: String,
pub closed: bool,
}
Expand All @@ -70,6 +71,7 @@ pub struct DocSearchIssueDocument {
pub struct DocSearchPullDocument {
pub title: String,
pub link: String,
pub author: String,
zwpaper marked this conversation as resolved.
Show resolved Hide resolved
pub body: String,
pub diff: String,
pub merged: bool,
Expand Down Expand Up @@ -139,6 +141,11 @@ impl FromTantivyDocument for DocSearchIssueDocument {
schema.field_attributes,
structured_doc::fields::issue::LINK,
);
let author = get_json_text_field(
doc,
schema.field_attributes,
structured_doc::fields::issue::AUTHOR,
);
let body = get_json_text_field(
doc,
schema.field_attributes,
Expand All @@ -152,6 +159,7 @@ impl FromTantivyDocument for DocSearchIssueDocument {
Some(Self {
title: title.into(),
link: link.into(),
author: author.into(),
body: body.into(),
closed,
})
Expand All @@ -171,6 +179,11 @@ impl FromTantivyDocument for DocSearchPullDocument {
schema.field_attributes,
structured_doc::fields::pull::LINK,
);
let author = get_json_text_field(
doc,
schema.field_attributes,
structured_doc::fields::pull::AUTHOR,
);
let body = get_json_text_field(
doc,
schema.field_attributes,
Expand All @@ -189,6 +202,7 @@ impl FromTantivyDocument for DocSearchPullDocument {
Some(Self {
title: title.into(),
link: link.into(),
author: author.into(),
body: body.into(),
diff: diff.into(),
merged,
Expand All @@ -200,20 +214,27 @@ fn get_json_field<'a>(
doc: &'a TantivyDocument,
field: schema::Field,
name: &str,
) -> CompactDocValue<'a> {
doc.get_first(field)
.unwrap()
.as_object()
.unwrap()
.find(|(k, _)| *k == name)
.unwrap()
.1
) -> Option<CompactDocValue<'a>> {
Some(
doc.get_first(field)?
.as_object()?
.find(|(k, _)| *k == name)?
.1,
)
}

fn get_json_bool_field(doc: &TantivyDocument, field: schema::Field, name: &str) -> bool {
get_json_field(doc, field, name).as_bool().unwrap()
if let Some(field) = get_json_field(doc, field, name) {
field.as_bool().unwrap_or_default()
} else {
false
}
}

fn get_json_text_field<'a>(doc: &'a TantivyDocument, field: schema::Field, name: &str) -> &'a str {
get_json_field(doc, field, name).as_str().unwrap()
if let Some(field) = get_json_field(doc, field, name) {
field.as_str().unwrap_or_default()
} else {
""
}
}
31 changes: 30 additions & 1 deletion crates/tabby-common/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ const FIELD_CHUNK_ID: &str = "chunk_id";
const FIELD_UPDATED_AT: &str = "updated_at";
const FIELD_FAILED_CHUNKS_COUNT: &str = "failed_chunks_count";
pub const FIELD_SOURCE_ID: &str = "source_id";
pub const FIELD_ATTRIBUTES: &str = "attributes";

pub mod corpus {
pub const CODE: &str = "code";
Expand Down Expand Up @@ -103,7 +104,7 @@ impl IndexSchema {
let field_updated_at = builder.add_date_field(FIELD_UPDATED_AT, INDEXED | STORED);
let field_failed_chunks_count =
builder.add_u64_field(FIELD_FAILED_CHUNKS_COUNT, INDEXED | FAST | STORED);
let field_attributes = builder.add_text_field("attributes", STORED);
let field_attributes = builder.add_json_field(FIELD_ATTRIBUTES, FAST | STORED);
zwpaper marked this conversation as resolved.
Show resolved Hide resolved

let field_chunk_id = builder.add_text_field(FIELD_CHUNK_ID, STRING | FAST | STORED);
let field_chunk_attributes = builder.add_json_field(
Expand Down Expand Up @@ -228,6 +229,34 @@ impl IndexSchema {
])
}

/// Build a query to check if the document has specific attribute field.
pub fn doc_has_attribute_field(&self, corpus: &str, doc_id: &str, field: &str) -> impl Query {
let doc_id_query = TermQuery::new(
Term::from_field_text(self.field_id, doc_id),
tantivy::schema::IndexRecordOption::Basic,
);

BooleanQuery::new(vec![
// Must match the corpus
(Occur::Must, self.corpus_query(corpus)),
// Must match the doc id
(Occur::Must, Box::new(doc_id_query)),
// Exclude chunk documents
(
Occur::MustNot,
Box::new(ExistsQuery::new_exists_query(FIELD_CHUNK_ID.into())),
),
zwpaper marked this conversation as resolved.
Show resolved Hide resolved
// Must has the failed_chunks_count field
(
Occur::Must,
Box::new(ExistsQuery::new_exists_query(format!(
"{}.{}",
FIELD_ATTRIBUTES, field
))),
),
])
}

/// Build a query to find the document with the given `doc_id`, include chunks.
pub fn doc_query_with_chunks(&self, corpus: &str, doc_id: &str) -> impl Query {
let doc_id_query = TermQuery::new(
Expand Down
2 changes: 2 additions & 0 deletions crates/tabby-common/src/index/structured_doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ pub mod fields {
pub mod issue {
pub const TITLE: &str = "title";
pub const LINK: &str = "link";
pub const AUTHOR: &str = "author";
zwpaper marked this conversation as resolved.
Show resolved Hide resolved
pub const BODY: &str = "body";
pub const CLOSED: &str = "closed";
}

pub mod pull {
pub const TITLE: &str = "title";
pub const LINK: &str = "link";
pub const AUTHOR: &str = "author";
zwpaper marked this conversation as resolved.
Show resolved Hide resolved
pub const BODY: &str = "body";
pub const DIFF: &str = "diff";
pub const MERGED: &str = "merged";
Expand Down
2 changes: 1 addition & 1 deletion crates/tabby-common/src/terminal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ impl<'a> InfoMessage<'a> {
}
}

impl<'a> ToString for InfoMessage<'a> {
impl ToString for InfoMessage<'_> {
fn to_string(&self) -> String {
let mut str = String::new();
str.push_str(&format!(" {}\n\n", self.header_format.format(self.header)));
Expand Down
4 changes: 2 additions & 2 deletions crates/tabby-git/src/grep/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ pub struct GrepMatchSink<'output, 'a> {
matcher: &'a RegexMatcher,
}

impl<'output, 'a> Sink for GrepMatchSink<'output, 'a> {
impl Sink for GrepMatchSink<'_, '_> {
type Error = std::io::Error;

fn matched(
Expand Down Expand Up @@ -201,7 +201,7 @@ pub struct GrepNegativeMatchSink<'output> {
output: &'output mut GrepOutput,
}

impl<'output> Sink for GrepNegativeMatchSink<'output> {
impl Sink for GrepNegativeMatchSink<'_> {
type Error = std::io::Error;

fn matched(
Expand Down
4 changes: 2 additions & 2 deletions crates/tabby-index-cli/src/timer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub struct OpenTimer<'a> {
depth: u32,
}

impl<'a> OpenTimer<'a> {
impl OpenTimer<'_> {
/// Starts timing a new named subtask
///
/// The timer is stopped automatically
Expand All @@ -23,7 +23,7 @@ impl<'a> OpenTimer<'a> {
}
}

impl<'a> Drop for OpenTimer<'a> {
impl Drop for OpenTimer<'_> {
fn drop(&mut self) {
self.timer_tree.timings.push(Timing {
name: self.name,
Expand Down
18 changes: 15 additions & 3 deletions crates/tabby-index/src/indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,10 +265,9 @@ impl Indexer {
!docs.is_empty()
}

/// Get the failed_chunks_count field for a document.
/// tracks the number of embedding indexing failed chunks for a document.
/// Check whether the document has failed chunks.
///
/// return 0 if the field is not found.
/// failed chunks tracks the number of embedding indexing failed chunks for a document.
pub fn has_failed_chunks(&self, id: &str) -> bool {
let schema = IndexSchema::instance();
let query = schema.doc_has_failed_chunks(&self.corpus, id);
Expand All @@ -278,6 +277,19 @@ impl Indexer {

!docs.is_empty()
}

// Check whether the document has attribute field.
pub fn has_attribute_field(&self, id: &str, field: &str) -> bool {
let schema = IndexSchema::instance();
let query = schema.doc_has_attribute_field(&self.corpus, id, field);
match self.searcher.search(&query, &TopDocs::with_limit(1)) {
Ok(docs) => !docs.is_empty(),
Err(e) => {
debug!("query tantivy error: {}", e);
false
}
}
}
}

pub struct IndexGarbageCollector {
Expand Down
67 changes: 52 additions & 15 deletions crates/tabby-index/src/indexer_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ mod structured_doc_tests {
use std::sync::Arc;

use serial_test::file_serial;
use tabby_common::index::corpus;
use tabby_common::index::{corpus, structured_doc::fields as StructuredDocIndexFields};
use temp_testdir::TempDir;

use super::mock_embedding::MockEmbedding;
Expand Down Expand Up @@ -63,6 +63,7 @@ mod structured_doc_tests {
fields: StructuredDocFields::Issue(StructuredDocIssueFields {
link: id.to_owned(),
title: "title".to_owned(),
author: "author".to_owned(),
body: "body".to_owned(),
closed: false,
}),
Expand All @@ -86,13 +87,7 @@ mod structured_doc_tests {
indexer.commit();

let validator = Indexer::new(corpus::STRUCTURED_DOC);
// Wait for up to 60s for the document to be indexed.
for _ in 0..10 {
if validator.is_indexed(id) {
break;
}
std::thread::sleep(std::time::Duration::from_secs(1));
}

assert!(validator.is_indexed(id));
assert!(validator.has_failed_chunks(id));

Expand All @@ -115,6 +110,7 @@ mod structured_doc_tests {
fields: StructuredDocFields::Issue(StructuredDocIssueFields {
link: id.to_owned(),
title: "title".to_owned(),
author: "author".to_owned(),
body: "body".to_owned(),
closed: false,
}),
Expand All @@ -138,18 +134,57 @@ mod structured_doc_tests {
indexer.commit();

let validator = Indexer::new(corpus::STRUCTURED_DOC);
// Wait for up to 60s for the document to be indexed.
for _ in 0..10 {
if validator.is_indexed(id) {
break;
}
std::thread::sleep(std::time::Duration::from_secs(1));
}

assert!(validator.is_indexed(id));
assert!(!validator.has_failed_chunks(id));

tabby_common::path::set_tabby_root(root);
}

#[test]
#[file_serial(set_tabby_root)]
fn test_structured_doc_has_attribute_field() {
let root = tabby_common::path::tabby_root();
let temp_dir = TempDir::default();
tabby_common::path::set_tabby_root(temp_dir.to_owned());

let id = "structured_doc_has_attribute_field";
let embedding = MockEmbedding::new(vec![1.0], false);
let embedding = Arc::new(embedding);
let indexer = StructuredDocIndexer::new(embedding.clone());
let doc = StructuredDoc {
source_id: "source".to_owned(),
fields: StructuredDocFields::Issue(StructuredDocIssueFields {
link: id.to_owned(),
title: "title".to_owned(),
author: "author".to_owned(),
body: "body".to_owned(),
closed: false,
}),
};

let updated_at = chrono::Utc::now();
let res = tokio::runtime::Runtime::new().unwrap().block_on(async {
indexer
.sync(
StructuredDocState {
updated_at,
deleted: false,
},
doc,
)
.await
});
assert!(res);
indexer.commit();

let validator = Indexer::new(corpus::STRUCTURED_DOC);

assert!(validator.is_indexed(id));
assert!(validator.has_attribute_field(id, StructuredDocIndexFields::issue::AUTHOR));

tabby_common::path::set_tabby_root(root);
}
}

mod builder_tests {
Expand Down Expand Up @@ -239,6 +274,7 @@ mod builder_tests {
fields: StructuredDocFields::Issue(StructuredDocIssueFields {
link: test_id.to_owned(),
title: "title".to_owned(),
author: "author".to_owned(),
body: "body".to_owned(),
closed: false,
}),
Expand Down Expand Up @@ -300,6 +336,7 @@ mod builder_tests {
fields: StructuredDocFields::Issue(StructuredDocIssueFields {
link: test_id.to_owned(),
title: "title".to_owned(),
author: "author".to_owned(),
body: "body".to_owned(),
closed: false,
}),
Expand Down
Loading
Loading