Merge branch 'feature/0.2.3.5' into develop

duydo · Aug 22, 2016 · d06fc09 · d06fc09
2 parents 1684587 + 187a75b
commit d06fc09
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 17 deletions.
diff --git a/pom.xml b/pom.xml
@@ -3,7 +3,7 @@
     <modelVersion>4.0.0</modelVersion>
     <groupId>org.elasticsearch</groupId>
     <artifactId>elasticsearch-analysis-vietnamese</artifactId>
-    <version>0.2.2</version>
+    <version>0.2.3.5</version>
     <packaging>jar</packaging>
     <name>elasticsearch-analysis-vietnamese</name>
     <url>https://github.com/duydo/elasticsearch-analysis-vietnamese/</url>
@@ -24,8 +24,8 @@
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
         <project.build.java.version>1.7</project.build.java.version>
-        <elasticsearch.version>2.2.0</elasticsearch.version>
-        <lucene.version>5.4.1</lucene.version>
+        <elasticsearch.version>2.3.5</elasticsearch.version>
+        <lucene.version>5.5.0</lucene.version>
         <tests.jvms>1</tests.jvms>
         <tests.shuffle>true</tests.shuffle>
         <tests.output>onerror</tests.output>

diff --git a/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java b/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java
@@ -45,7 +45,6 @@ public class VietnameseTokenizer extends Tokenizer {
     private Iterator<TaggedWord> taggedWords;
 
     private int offset = 0;
-    private int finalOffset = 0;
     private int skippedPositions;
 
 
@@ -101,19 +100,16 @@ public final boolean incrementToken() throws IOException {
         clearAttributes();
         while (taggedWords.hasNext()) {
             final TaggedWord word = taggedWords.next();
-            final int length = word.getText().length();
-            final int currentOffset = offset;
-            offset += length;
             if (accept(word)) {
-                posIncrAtt.setPositionIncrement(skippedPositions + 1);
-                termAtt.copyBuffer(word.getText().trim().toCharArray(), 0, length);
-                offsetAtt.setOffset(correctOffset(currentOffset), finalOffset = correctOffset(offset));
+                final char[] chars = word.getText().trim().toCharArray();
+                termAtt.copyBuffer(chars, 0, chars.length);
                 typeAtt.setType(word.getRule().getName());
+                posIncrAtt.setPositionIncrement(skippedPositions + 1);
+                offsetAtt.setOffset(correctOffset(offset), offset = correctOffset(offset + termAtt.length()));
+                offset++;
                 return true;
-            } else {
-                // When we skip non-word characters, we still increment the position increment
-                skippedPositions++;
             }
+            ++skippedPositions;
         }
         return false;
     }
@@ -132,17 +128,14 @@ private final boolean accept(TaggedWord word) {
     @Override
     public final void end() throws IOException {
         super.end();
-        // set final offset
-        offsetAtt.setOffset(finalOffset, finalOffset);
-        // adjust any skipped tokens
+        offsetAtt.setOffset(offset, offset);
         posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
     }
 
     @Override
     public void reset() throws IOException {
         super.reset();
         offset = 0;
-        finalOffset = 0;
         skippedPositions = 0;
         tokenize(input);
     }