From 694ca30c7cb3e0bb3cff20914cb6c95d5a446173 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Sun, 11 Aug 2024 12:06:02 -0700 Subject: [PATCH] feat(nodejs): add bitmap and label list index types in nodejs (#1532) --- nodejs/__test__/table.test.ts | 20 ++++++++++++++++++++ nodejs/lancedb/indices.ts | 23 +++++++++++++++++++++++ nodejs/lancedb/query.ts | 2 +- nodejs/src/index.rs | 14 ++++++++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index f95bf1216..7cc9556a9 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -31,7 +31,9 @@ import { Float64, Int32, Int64, + List, Schema, + Utf8, makeArrowTable, } from "../lancedb/arrow"; import { @@ -331,6 +333,7 @@ describe("When creating an index", () => { const schema = new Schema([ new Field("id", new Int32(), true), new Field("vec", new FixedSizeList(32, new Field("item", new Float32()))), + new Field("tags", new List(new Field("item", new Utf8(), true))), ]); let tbl: Table; let queryVec: number[]; @@ -346,6 +349,7 @@ describe("When creating an index", () => { vec: Array(32) .fill(1) .map(() => Math.random()), + tags: ["tag1", "tag2", "tag3"], })), { schema, @@ -428,6 +432,22 @@ describe("When creating an index", () => { } }); + test("create a bitmap index", async () => { + await tbl.createIndex("id", { + config: Index.bitmap(), + }); + const indexDir = path.join(tmpDir.name, "test.lance", "_indices"); + expect(fs.readdirSync(indexDir)).toHaveLength(1); + }); + + test("create a label list index", async () => { + await tbl.createIndex("tags", { + config: Index.labelList(), + }); + const indexDir = path.join(tmpDir.name, "test.lance", "_indices"); + expect(fs.readdirSync(indexDir)).toHaveLength(1); + }); + test("should be able to get index stats", async () => { await tbl.createIndex("id"); diff --git a/nodejs/lancedb/indices.ts b/nodejs/lancedb/indices.ts index 37aa3c5dd..8b1ab3e73 100644 --- a/nodejs/lancedb/indices.ts +++ b/nodejs/lancedb/indices.ts @@ -176,6 +176,29 @@ export class Index { return new Index(LanceDbIndex.btree()); } + /** + * Create a bitmap index. + * + * A `Bitmap` index stores a bitmap for each distinct value in the column for every row. + * + * This index works best for low-cardinality columns, where the number of unique values + * is small (i.e., less than a few hundreds). + */ + static bitmap() { + return new Index(LanceDbIndex.bitmap()); + } + + /** + * Create a label list index. + * + * LabelList index is a scalar index that can be used on `List` columns to + * support queries with `array_contains_all` and `array_contains_any` + * using an underlying bitmap index. + */ + static labelList() { + return new Index(LanceDbIndex.labelList()); + } + /** * Create a full text search index * diff --git a/nodejs/lancedb/query.ts b/nodejs/lancedb/query.ts index c96d0c8e6..594e56a15 100644 --- a/nodejs/lancedb/query.ts +++ b/nodejs/lancedb/query.ts @@ -151,7 +151,7 @@ export class QueryBase query: string, options?: Partial, ): this { - let columns = null; + let columns: string[] | null = null; if (options) { if (typeof options.columns === "string") { columns = [options.columns]; diff --git a/nodejs/src/index.rs b/nodejs/src/index.rs index 461b00217..21b212e2c 100644 --- a/nodejs/src/index.rs +++ b/nodejs/src/index.rs @@ -77,6 +77,20 @@ impl Index { } } + #[napi(factory)] + pub fn bitmap() -> Self { + Self { + inner: Mutex::new(Some(LanceDbIndex::Bitmap(Default::default()))), + } + } + + #[napi(factory)] + pub fn label_list() -> Self { + Self { + inner: Mutex::new(Some(LanceDbIndex::LabelList(Default::default()))), + } + } + #[napi(factory)] pub fn fts() -> Self { Self {