Skip to content

Commit

Permalink
Merge branch 'feature/0.2.3.5' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
duydo committed Aug 22, 2016
2 parents 1684587 + 187a75b commit d06fc09
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 17 deletions.
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-analysis-vietnamese</artifactId>
<version>0.2.2</version>
<version>0.2.3.5</version>
<packaging>jar</packaging>
<name>elasticsearch-analysis-vietnamese</name>
<url>https://github.com/duydo/elasticsearch-analysis-vietnamese/</url>
Expand All @@ -24,8 +24,8 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.build.java.version>1.7</project.build.java.version>
<elasticsearch.version>2.2.0</elasticsearch.version>
<lucene.version>5.4.1</lucene.version>
<elasticsearch.version>2.3.5</elasticsearch.version>
<lucene.version>5.5.0</lucene.version>
<tests.jvms>1</tests.jvms>
<tests.shuffle>true</tests.shuffle>
<tests.output>onerror</tests.output>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ public class VietnameseTokenizer extends Tokenizer {
private Iterator<TaggedWord> taggedWords;

private int offset = 0;
private int finalOffset = 0;
private int skippedPositions;


Expand Down Expand Up @@ -101,19 +100,16 @@ public final boolean incrementToken() throws IOException {
clearAttributes();
while (taggedWords.hasNext()) {
final TaggedWord word = taggedWords.next();
final int length = word.getText().length();
final int currentOffset = offset;
offset += length;
if (accept(word)) {
posIncrAtt.setPositionIncrement(skippedPositions + 1);
termAtt.copyBuffer(word.getText().trim().toCharArray(), 0, length);
offsetAtt.setOffset(correctOffset(currentOffset), finalOffset = correctOffset(offset));
final char[] chars = word.getText().trim().toCharArray();
termAtt.copyBuffer(chars, 0, chars.length);
typeAtt.setType(word.getRule().getName());
posIncrAtt.setPositionIncrement(skippedPositions + 1);
offsetAtt.setOffset(correctOffset(offset), offset = correctOffset(offset + termAtt.length()));
offset++;
return true;
} else {
// When we skip non-word characters, we still increment the position increment
skippedPositions++;
}
++skippedPositions;
}
return false;
}
Expand All @@ -132,17 +128,14 @@ private final boolean accept(TaggedWord word) {
@Override
public final void end() throws IOException {
super.end();
// set final offset
offsetAtt.setOffset(finalOffset, finalOffset);
// adjust any skipped tokens
offsetAtt.setOffset(offset, offset);
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
}

@Override
public void reset() throws IOException {
super.reset();
offset = 0;
finalOffset = 0;
skippedPositions = 0;
tokenize(input);
}
Expand Down

0 comments on commit d06fc09

Please sign in to comment.