diff --git a/README.md b/README.md index 8f879db..73ac484 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,10 @@ Vietnamese Analysis Plugin for Elasticsearch Vietnamese Analysis plugin integrates Vietnamese language analysis into Elasticsearch. -In order to install the plugin, simply run: +In order to install the plugin, choose a version in [releases](https://github.com/duydo/elasticsearch-analysis-vietnamese/releases) page then run: ```sh -bin/plugin --url https://dl.dropboxusercontent.com/u/1598491/elasticsearch-analysis-vietnamese-0.2.2.zip --install analysis-vietnamese +bin/plugin install link-to-binary-version ``` Or to build from source, you need to build it with Maven: @@ -20,7 +20,13 @@ bin/plugin install file:target/releases/elasticsearch-analysis-vietnamese-0.2.2. |Vietnamese Analysis Plugin|Elasticsearch| |---|---| -| master|2.2.0| +| master|2.3.5| +| 2.3.5|2.3.5| +| 2.3.4|2.3.4| +| 2.3.3|2.3.3| +| 2.3.2|2.3.2| +| 2.3.1|2.3.1| +| 2.3.0|2.3.0| | 0.2.2|2.2.0| | 0.2.1.1|2.1.1| | 0.2.1|2.1.0| diff --git a/pom.xml b/pom.xml index 4a08b7d..e3a2623 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-vietnamese - 0.2.3.5 + 2.3.5 jar elasticsearch-analysis-vietnamese https://github.com/duydo/elasticsearch-analysis-vietnamese/ diff --git a/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java b/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java index 401827a..4317191 100644 --- a/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java +++ b/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java @@ -101,15 +101,15 @@ public final boolean incrementToken() throws IOException { while (taggedWords.hasNext()) { final TaggedWord word = taggedWords.next(); if (accept(word)) { - final char[] chars = word.getText().trim().toCharArray(); - termAtt.copyBuffer(chars, 0, chars.length); - typeAtt.setType(word.getRule().getName()); posIncrAtt.setPositionIncrement(skippedPositions + 1); - offsetAtt.setOffset(correctOffset(offset), offset = correctOffset(offset + termAtt.length())); + typeAtt.setType(word.getRule().getName()); + final int length = word.getText().length(); + termAtt.copyBuffer(word.getText().toCharArray(), 0, length); + offsetAtt.setOffset(correctOffset(offset), offset = correctOffset(offset + length)); offset++; return true; } - ++skippedPositions; + skippedPositions++; } return false; } @@ -128,7 +128,8 @@ private final boolean accept(TaggedWord word) { @Override public final void end() throws IOException { super.end(); - offsetAtt.setOffset(offset, offset); + final int finalOffset = correctOffset(offset); + offsetAtt.setOffset(finalOffset, finalOffset); posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); }