forked from filodb/FiloDB
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(core): Add open/close support for Tantivy Index
Commit 1 of adding Tantivy index support. This PR is broken down into a small subset of the overall Tantivy logic to allow for easier partial review. The index code is not usable end to end until all parts are committed. This adds the basic Rust project skeleton and supports opening, applying schema, and closing the index. Many methods are unimplemented and will be added in follow up PRs. End to end testing is not available in this PR as the index test suite requires ingestion and query support for verification.
- Loading branch information
Showing
19 changed files
with
2,001 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
213 changes: 213 additions & 0 deletions
213
core/src/main/scala/filodb.core/memstore/PartKeyTantivyIndex.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
package filodb.core.memstore | ||
|
||
import java.io.File | ||
import java.nio.file.Files | ||
|
||
import debox.Buffer | ||
import org.apache.commons.lang3.SystemUtils | ||
import org.apache.lucene.util.BytesRef | ||
|
||
import filodb.core.DatasetRef | ||
import filodb.core.metadata.Column.ColumnType.{MapColumn, StringColumn} | ||
import filodb.core.metadata.PartitionSchema | ||
import filodb.core.query.ColumnFilter | ||
|
||
class PartKeyTantivyIndex(ref: DatasetRef, | ||
schema: PartitionSchema, | ||
shardNum: Int, | ||
retentionMillis: Long, // only used to calculate fallback startTime | ||
diskLocation: Option[File] = None, | ||
lifecycleManager: Option[IndexMetadataStore] = None | ||
) extends PartKeyIndexRaw(ref, shardNum, schema, diskLocation, lifecycleManager) { | ||
|
||
// Compute field names for native schema code | ||
private val schemaFields = schema.columns.filter { c => | ||
c.columnType == StringColumn | ||
}.map { c => | ||
c.name | ||
}.toArray | ||
|
||
private val schemaMapFields = schema.columns.filter { c => | ||
c.columnType == MapColumn | ||
}.map { c => | ||
c.name | ||
}.toArray | ||
|
||
private val schemaMultiColumnFacets = schema.options.multiColumnFacets.keys.toArray | ||
|
||
// Native handle for cross JNI operations | ||
private var indexHandle: Long = loadIndexData(() => TantivyNativeMethods.newIndexHandle(indexDiskLocation.toString, | ||
schemaFields, schemaMapFields, schemaMultiColumnFacets)) | ||
|
||
logger.info(s"Created tantivy index for dataset=$ref shard=$shardNum at $indexDiskLocation") | ||
|
||
override def reset(): Unit = { | ||
TantivyNativeMethods.reset(indexHandle) | ||
} | ||
|
||
override def startFlushThread(flushDelayMinSeconds: Int, flushDelayMaxSeconds: Int): Unit = { | ||
??? | ||
} | ||
|
||
override def partIdsEndedBefore(endedBefore: Long): Buffer[Int] = { | ||
??? | ||
} | ||
|
||
override def removePartitionsEndedBefore(endedBefore: Long, returnApproxDeletedCount: Boolean): Int = { | ||
??? | ||
} | ||
|
||
override def removePartKeys(partIds: Buffer[Int]): Unit = { | ||
??? | ||
} | ||
|
||
override def indexRamBytes: Long = { | ||
??? | ||
} | ||
|
||
override def indexNumEntries: Long = { | ||
??? | ||
} | ||
|
||
override def closeIndex(): Unit = { | ||
logger.info(s"Closing index on dataset=$ref shard=$shardNum") | ||
|
||
commit() | ||
TantivyNativeMethods.freeIndexHandle(indexHandle) | ||
indexHandle = 0 | ||
} | ||
|
||
override def indexNames(limit: Int): Seq[String] = { | ||
??? | ||
} | ||
|
||
override def indexValues(fieldName: String, topK: Int): Seq[TermInfo] = { | ||
??? | ||
} | ||
|
||
override def labelNamesEfficient(colFilters: Seq[ColumnFilter], startTime: Long, endTime: Long): Seq[String] = { | ||
??? | ||
} | ||
|
||
override def labelValuesEfficient(colFilters: Seq[ColumnFilter], startTime: Long, endTime: Long, | ||
colName: String, limit: Int): Seq[String] = { | ||
??? | ||
} | ||
|
||
override def addPartKey(partKeyOnHeapBytes: Array[Byte], partId: Int, startTime: Long, endTime: Long, | ||
partKeyBytesRefOffset: Int)(partKeyNumBytes: Int, documentId: String): Unit = { | ||
??? | ||
} | ||
|
||
override def upsertPartKey(partKeyOnHeapBytes: Array[Byte], partId: Int, startTime: Long, endTime: Long, | ||
partKeyBytesRefOffset: Int)(partKeyNumBytes: Int, documentId: String): Unit = { | ||
??? | ||
} | ||
|
||
override def partKeyFromPartId(partId: Int): Option[BytesRef] = { | ||
??? | ||
} | ||
|
||
override def startTimeFromPartId(partId: Int): Long = { | ||
??? | ||
} | ||
|
||
override def endTimeFromPartId(partId: Int): Long = { | ||
??? | ||
} | ||
|
||
override def startTimeFromPartIds(partIds: Iterator[Int]): debox.Map[Int, Long] = { | ||
??? | ||
} | ||
|
||
override def commit(): Unit = { | ||
TantivyNativeMethods.commit(indexHandle) | ||
} | ||
|
||
override def updatePartKeyWithEndTime(partKeyOnHeapBytes: Array[Byte], partId: Int, endTime: Long, | ||
partKeyBytesRefOffset: Int)(partKeyNumBytes: Int, documentId: String): Unit = { | ||
??? | ||
} | ||
|
||
override def refreshReadersBlocking(): Unit = { | ||
??? | ||
} | ||
|
||
override def partIdsFromFilters(columnFilters: Seq[ColumnFilter], startTime: Long, endTime: Long, | ||
limit: Int): Buffer[Int] = { | ||
??? | ||
} | ||
|
||
override def partKeyRecordsFromFilters(columnFilters: Seq[ColumnFilter], startTime: Long, endTime: Long, | ||
limit: Int): Seq[PartKeyLuceneIndexRecord] = { | ||
??? | ||
} | ||
|
||
override def partIdFromPartKeySlow(partKeyBase: Any, partKeyOffset: Long): Option[Int] = { | ||
??? | ||
} | ||
|
||
override def singlePartKeyFromFilters(columnFilters: Seq[ColumnFilter], startTime: Long, | ||
endTime: Long): Option[Array[Byte]] = { | ||
??? | ||
} | ||
|
||
override protected def addIndexedField(key: String, value: String): Unit = { | ||
??? | ||
} | ||
|
||
protected def addIndexedMapField(mapColumn: String, key: String, value: String): Unit = { | ||
??? | ||
} | ||
|
||
protected override def addMultiColumnFacet(key: String, value: String): Unit = { | ||
??? | ||
} | ||
} | ||
|
||
// JNI methods | ||
protected object TantivyNativeMethods { | ||
// Load native library from jar | ||
private def loadLibrary(): Unit = { | ||
val tempDir = Files.createTempDirectory("filodb-native-") | ||
|
||
val lib = System.mapLibraryName("filodb_core") | ||
|
||
val arch = SystemUtils.OS_ARCH | ||
val kernel = if (SystemUtils.IS_OS_LINUX) { | ||
"linux" | ||
} else if (SystemUtils.IS_OS_MAC) { | ||
"darwin" | ||
} else if (SystemUtils.IS_OS_WINDOWS) { | ||
"windows" | ||
} else { | ||
sys.error(s"Unhandled platform ${SystemUtils.OS_NAME}") | ||
} | ||
|
||
val resourcePath: String = "/native/" + kernel + "/" + arch + "/" + lib | ||
val resourceStream = Option(TantivyNativeMethods.getClass.getResourceAsStream(resourcePath)).get | ||
|
||
val finalPath = tempDir.resolve(lib) | ||
Files.copy(resourceStream, finalPath) | ||
|
||
System.load(finalPath.toAbsolutePath.toString) | ||
} | ||
|
||
loadLibrary() | ||
|
||
@native | ||
def newIndexHandle(diskLocation: String, schemaFields: Array[String], | ||
schemaMapFields: Array[String], schemaMultiColumnFacets: Array[String]): Long | ||
|
||
// Free memory used by an index handle | ||
@native | ||
def freeIndexHandle(handle: Long): Unit | ||
|
||
// Reset index data (delete all docs) | ||
@native | ||
def reset(handle: Long): Unit | ||
|
||
// Commit changes to the index | ||
@native | ||
def commit(handle: Long): Unit | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.