Skip to content

Commit

Permalink
Update lindera to 0.10.0 (#32)
Browse files Browse the repository at this point in the history
* Update lindera to 0.10.0

* Update CHANGES.md
  • Loading branch information
mosuka authored Feb 25, 2022
1 parent fdf01fc commit d97a542
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 14 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).

## 0.10.0 (2022-02-25)
- Update lindera to 0.10.0 #32 @mosuka

## 0.9.0 (2022-02-20)
- Update Lindera to v0.9.0 #30 @mosuka
- Use RwLock instead of cloning tokenizer #27 @vbkaisetsu
Expand Down
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "lindera-tantivy"
version = "0.9.0"
version = "0.10.0"
edition = "2021"
description = "A Tokenizer for Tantivy, based on Lindera."
documentation = "https://docs.rs/lindera-tantivy"
Expand All @@ -14,8 +14,8 @@ license = "MIT"
[dependencies]
tantivy = "0.16"

lindera = "0.9.0"
lindera-core = "0.9.0"
lindera = "0.10.0"
lindera-core = "0.10.0"

[dev-dependencies]
criterion = "0.3"
Expand Down
16 changes: 5 additions & 11 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::sync::RwLock;

use tantivy::tokenizer::{BoxTokenStream, Tokenizer};

use lindera::tokenizer::{Tokenizer as LTokenizer, TokenizerConfig};
Expand Down Expand Up @@ -69,38 +67,34 @@ use crate::stream::LinderaTokenStream;
/// assert!(stream.next().is_none());
/// ```
pub struct LinderaTokenizer {
pub tokenizer: RwLock<LTokenizer>,
pub tokenizer: LTokenizer,
}

impl Clone for LinderaTokenizer {
fn clone(&self) -> Self {
Self {
// NOTE: read() returns an error when the lock is poisoned.
// That case means the tokenizer panics, so we use unwrap() here.
tokenizer: RwLock::new(self.tokenizer.read().unwrap().clone()),
tokenizer: self.tokenizer.clone(),
}
}
}

impl LinderaTokenizer {
pub fn new() -> LinderaResult<LinderaTokenizer> {
Ok(LinderaTokenizer {
tokenizer: RwLock::new(LTokenizer::new()?),
tokenizer: LTokenizer::new()?,
})
}

pub fn with_config(config: TokenizerConfig) -> LinderaResult<LinderaTokenizer> {
Ok(LinderaTokenizer {
tokenizer: RwLock::new(LTokenizer::with_config(config)?),
tokenizer: LTokenizer::with_config(config)?,
})
}
}

impl Tokenizer for LinderaTokenizer {
fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
// NOTE: write() returns an error when the lock is poisoned.
// That case means the tokenizer panics, so we use unwrap() here.
let result = match self.tokenizer.write().unwrap().tokenize(text) {
let result = match self.tokenizer.tokenize(text) {
Ok(result) => result,
Err(_err) => Vec::new(),
};
Expand Down

0 comments on commit d97a542

Please sign in to comment.