diff --git a/README.md b/README.md
index 7019441..b5109fd 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,8 @@ Check this post: [How to build Elasticsearch Vietnamese Analysis Plugin](http://
## Compatible Versions
| Vietnamese Analysis Plugin | Elasticsearch |
| -------------------------- | ------------- |
-| master | 6.5.3 |
+| master | 7.3.1 |
+| 7.3.1 | 7.3.1 |
| 5.6.5 | 5.6.5 |
| 5.4.1 | 5.4.1 |
| 5.3.1 | 5.3.1 |
@@ -47,7 +48,7 @@ Check this post: [How to build Elasticsearch Vietnamese Analysis Plugin](http://
- [JetBrains](https://www.jetbrains.com) has provided a free license for their great tool: [IntelliJ IDEA](https://www.jetbrains.com/idea/)
## License
-
+
This software is licensed under the Apache 2 license, quoted below.
Licensed under the Apache License, Version 2.0 (the "License"); you may not
diff --git a/pom.xml b/pom.xml
index f191389..401f087 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
4.0.0
org.elasticsearch
elasticsearch-analysis-vietnamese
- 7.0.0
+ 7.3.1
jar
elasticsearch-analysis-vietnamese
https://github.com/duydo/elasticsearch-analysis-vietnamese/
@@ -20,7 +20,7 @@
duydo
Duy Do
- http://duydo.me
+ https://duydo.me
@@ -31,7 +31,7 @@
UTF-8
1.8
- 7.0.0
+ 7.3.1
2.7
diff --git a/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java b/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java
index 6661d52..0e593a2 100644
--- a/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java
+++ b/src/main/java/org/apache/lucene/analysis/vi/VietnameseTokenizer.java
@@ -78,7 +78,7 @@ public final boolean incrementToken() throws IOException {
final int length = word.getText().length();
typeAtt.setType(String.format("<%s>", word.getRule().getName().toUpperCase()));
termAtt.copyBuffer(word.getText().toCharArray(), 0, length);
- final int start = inputText.indexOf(word.getText(), i);
+ final int start = inputText.indexOf(word.getText(), offset);
offsetAtt.setOffset(correctOffset(start), offset = correctOffset(start + length));
return true;
}
diff --git a/src/test/java/org/elasticsearch/index/analysis/VietnameseAnalysisIntegrationTest.java b/src/test/java/org/elasticsearch/index/analysis/VietnameseAnalysisIntegrationTest.java
index 313919e..88b200f 100644
--- a/src/test/java/org/elasticsearch/index/analysis/VietnameseAnalysisIntegrationTest.java
+++ b/src/test/java/org/elasticsearch/index/analysis/VietnameseAnalysisIntegrationTest.java
@@ -2,10 +2,9 @@
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
-import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugin.analysis.vi.AnalysisVietnamesePlugin;
import org.elasticsearch.plugins.Plugin;
@@ -45,7 +44,8 @@ public void testPluginIsLoaded() throws Exception {
}
public void testVietnameseAnalyzer() throws ExecutionException, InterruptedException {
- AnalyzeResponse response = client().admin().indices()
+
+ AnalyzeAction.Response response = client().admin().indices()
.prepareAnalyze("công nghệ thông tin Việt Nam").setAnalyzer("vi_analyzer")
.execute().get();
String[] expected = {"công nghệ thông tin", "việt", "nam"};
@@ -60,7 +60,7 @@ public void testVietnameseAnalyzerInMapping() throws ExecutionException, Interru
createIndex("test");
ensureGreen("test");
final XContentBuilder mapping = jsonBuilder().startObject()
- .startObject("type")
+ .startObject("_doc")
.startObject("properties")
.startObject("foo")
.field("type", "text")
@@ -69,9 +69,9 @@ public void testVietnameseAnalyzerInMapping() throws ExecutionException, Interru
.endObject()
.endObject()
.endObject();
- client().admin().indices().preparePutMapping("test").setType("type").setSource(mapping).get();
+ client().admin().indices().preparePutMapping("test").setType("_doc").setSource(mapping).get();
final XContentBuilder source = jsonBuilder().startObject().field("foo", "công nghệ thông tin Việt Nam").endObject();
- index("test", "type", "1", source);
+ index("test", "_doc", "1", source);
refresh();
SearchResponse response = client().prepareSearch("test").setQuery(
QueryBuilders.matchQuery("foo", "công nghệ thông tin")).execute().actionGet();
diff --git a/src/test/java/org/elasticsearch/index/analysis/VietnameseAnalysisTest.java b/src/test/java/org/elasticsearch/index/analysis/VietnameseAnalysisTest.java
index 1f579b9..af07834 100644
--- a/src/test/java/org/elasticsearch/index/analysis/VietnameseAnalysisTest.java
+++ b/src/test/java/org/elasticsearch/index/analysis/VietnameseAnalysisTest.java
@@ -3,6 +3,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.vi.VietnameseAnalyzer;
import org.apache.lucene.analysis.vi.VietnameseTokenizer;
import org.elasticsearch.Version;
@@ -80,4 +81,25 @@ public TestAnalysis createTestAnalysis() throws IOException {
Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
return createTestAnalysis(new Index("test", "_na_"), nodeSettings, settings, new AnalysisVietnamesePlugin());
}
+
+ public void testTokenOffset() throws IOException {
+
+ TestAnalysis analysis = createTestAnalysis();
+ NamedAnalyzer analyzer = analysis.indexAnalyzers.get("vi_analyzer");
+ assertNotNull(analyzer);
+
+ TokenStream ts = analyzer.analyzer().tokenStream("test", "Phụ tùng xe Mazda bán tải dưới 7 chỗ: ống dẫn gió tới két làm mát khí nạp- cao su lưu hóa, mới 100%, phục vụ BHBD. Ms:1D0013246A");
+ CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
+ OffsetAttribute offset = ts.getAttribute(OffsetAttribute.class);
+ ts.reset();
+ String[] expected = new String[]{"phụ tùng", "xe", "mazda", "bán", "tải", "7", "chỗ", "ống", "dẫn", "gió", "tới", "két", "làm", "mát", "khí", "nạp", "cao su", "lưu hóa", "mới", "100%", "phục vụ", "bhbd", "ms", "1", "d0", "013246", "a"};
+ int[] expectedOffset = new int[]{0, 9, 12, 18, 22, 31, 33, 38, 42, 46, 50, 54, 58, 62, 66, 70, 75, 82, 91, 95, 101, 109, 115, 118, 119, 121, 127};
+
+ for (int i = 0; i < expected.length; i++) {
+ assertThat(ts.incrementToken(), equalTo(true));
+ assertThat(term.toString(), equalTo(expected[i]));
+ assertTrue(offset.startOffset() == expectedOffset[i]);
+ }
+ assertThat(ts.incrementToken(), equalTo(false));
+ }
}