Skip to content

Commit

Permalink
Merge branch 'release/v7.3.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
duydo committed Aug 26, 2019
2 parents 0c59307 + 4077d84 commit 47dca62
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 12 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ Check this post: [How to build Elasticsearch Vietnamese Analysis Plugin](http://
## Compatible Versions
| Vietnamese Analysis Plugin | Elasticsearch |
| -------------------------- | ------------- |
| master | 6.5.3 |
| master | 7.3.1 |
| 7.3.1 | 7.3.1 |
| 5.6.5 | 5.6.5 |
| 5.4.1 | 5.4.1 |
| 5.3.1 | 5.3.1 |
Expand Down Expand Up @@ -47,7 +48,7 @@ Check this post: [How to build Elasticsearch Vietnamese Analysis Plugin](http://
- [JetBrains](https://www.jetbrains.com) has provided a free license for their great tool: [IntelliJ IDEA](https://www.jetbrains.com/idea/)

## License

This software is licensed under the Apache 2 license, quoted below.

Licensed under the Apache License, Version 2.0 (the "License"); you may not
Expand Down
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-analysis-vietnamese</artifactId>
<version>7.0.0</version>
<version>7.3.1</version>
<packaging>jar</packaging>
<name>elasticsearch-analysis-vietnamese</name>
<url>https://github.com/duydo/elasticsearch-analysis-vietnamese/</url>
Expand All @@ -20,7 +20,7 @@
<developer>
<id>duydo</id>
<name>Duy Do</name>
<url>http://duydo.me</url>
<url>https://duydo.me</url>
</developer>
</developers>
<scm>
Expand All @@ -31,7 +31,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.build.java.version>1.8</project.build.java.version>
<elasticsearch.version>7.0.0</elasticsearch.version>
<elasticsearch.version>7.3.1</elasticsearch.version>
<log4j.version>2.7</log4j.version>
</properties>
<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public final boolean incrementToken() throws IOException {
final int length = word.getText().length();
typeAtt.setType(String.format("<%s>", word.getRule().getName().toUpperCase()));
termAtt.copyBuffer(word.getText().toCharArray(), 0, length);
final int start = inputText.indexOf(word.getText(), i);
final int start = inputText.indexOf(word.getText(), offset);
offsetAtt.setOffset(correctOffset(start), offset = correctOffset(start + length));
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugin.analysis.vi.AnalysisVietnamesePlugin;
import org.elasticsearch.plugins.Plugin;
Expand Down Expand Up @@ -45,7 +44,8 @@ public void testPluginIsLoaded() throws Exception {
}

public void testVietnameseAnalyzer() throws ExecutionException, InterruptedException {
AnalyzeResponse response = client().admin().indices()

AnalyzeAction.Response response = client().admin().indices()
.prepareAnalyze("công nghệ thông tin Việt Nam").setAnalyzer("vi_analyzer")
.execute().get();
String[] expected = {"công nghệ thông tin", "việt", "nam"};
Expand All @@ -60,7 +60,7 @@ public void testVietnameseAnalyzerInMapping() throws ExecutionException, Interru
createIndex("test");
ensureGreen("test");
final XContentBuilder mapping = jsonBuilder().startObject()
.startObject("type")
.startObject("_doc")
.startObject("properties")
.startObject("foo")
.field("type", "text")
Expand All @@ -69,9 +69,9 @@ public void testVietnameseAnalyzerInMapping() throws ExecutionException, Interru
.endObject()
.endObject()
.endObject();
client().admin().indices().preparePutMapping("test").setType("type").setSource(mapping).get();
client().admin().indices().preparePutMapping("test").setType("_doc").setSource(mapping).get();
final XContentBuilder source = jsonBuilder().startObject().field("foo", "công nghệ thông tin Việt Nam").endObject();
index("test", "type", "1", source);
index("test", "_doc", "1", source);
refresh();
SearchResponse response = client().prepareSearch("test").setQuery(
QueryBuilders.matchQuery("foo", "công nghệ thông tin")).execute().actionGet();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.vi.VietnameseAnalyzer;
import org.apache.lucene.analysis.vi.VietnameseTokenizer;
import org.elasticsearch.Version;
Expand Down Expand Up @@ -80,4 +81,25 @@ public TestAnalysis createTestAnalysis() throws IOException {
Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
return createTestAnalysis(new Index("test", "_na_"), nodeSettings, settings, new AnalysisVietnamesePlugin());
}

public void testTokenOffset() throws IOException {

TestAnalysis analysis = createTestAnalysis();
NamedAnalyzer analyzer = analysis.indexAnalyzers.get("vi_analyzer");
assertNotNull(analyzer);

TokenStream ts = analyzer.analyzer().tokenStream("test", "Phụ tùng xe Mazda bán tải dưới 7 chỗ: ống dẫn gió tới két làm mát khí nạp- cao su lưu hóa, mới 100%, phục vụ BHBD. Ms:1D0013246A");
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offset = ts.getAttribute(OffsetAttribute.class);
ts.reset();
String[] expected = new String[]{"phụ tùng", "xe", "mazda", "bán", "tải", "7", "chỗ", "ống", "dẫn", "gió", "tới", "két", "làm", "mát", "khí", "nạp", "cao su", "lưu hóa", "mới", "100%", "phục vụ", "bhbd", "ms", "1", "d0", "013246", "a"};
int[] expectedOffset = new int[]{0, 9, 12, 18, 22, 31, 33, 38, 42, 46, 50, 54, 58, 62, 66, 70, 75, 82, 91, 95, 101, 109, 115, 118, 119, 121, 127};

for (int i = 0; i < expected.length; i++) {
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term.toString(), equalTo(expected[i]));
assertTrue(offset.startOffset() == expectedOffset[i]);
}
assertThat(ts.incrementToken(), equalTo(false));
}
}

0 comments on commit 47dca62

Please sign in to comment.