-
Notifications
You must be signed in to change notification settings - Fork 448
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OPENNLP-1650 Update DownloadUtil to use Models release 1.2
- adapts DownloadUtil, related classes and tests towards Models 1.2 - updates index.html in opennlp/tools/util to latest data Models 1.2 for DownloadParserTest - introduces DownloadUtil.ModelType#LEMMATIZER as those are now available - adds LemmatizerModelLoaderIT - extracts some cnp'ed strings to constants - fixes broken JavaDoc in PerceptronTrainer along the path
- Loading branch information
Showing
12 changed files
with
676 additions
and
400 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
opennlp-tools/src/test/java/opennlp/tools/cmdline/lemmatizer/LemmatizerModelLoaderIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package opennlp.tools.cmdline.lemmatizer; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
|
||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.BeforeAll; | ||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.ValueSource; | ||
|
||
import opennlp.tools.AbstractModelLoaderTest; | ||
import opennlp.tools.EnabledWhenCDNAvailable; | ||
import opennlp.tools.lemmatizer.LemmatizerModel; | ||
import opennlp.tools.util.DownloadUtil; | ||
|
||
@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org") | ||
public class LemmatizerModelLoaderIT extends AbstractModelLoaderTest { | ||
|
||
// SUT | ||
private LemmatizerModelLoader loader; | ||
|
||
@BeforeAll | ||
public static void initResources() { | ||
SUPPORTED_LANG_CODES.forEach(lang -> { | ||
try { | ||
DownloadUtil.downloadModel(lang, DownloadUtil.ModelType.LEMMATIZER, LemmatizerModel.class); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
}); | ||
} | ||
|
||
@BeforeEach | ||
public void setup() { | ||
loader = new LemmatizerModelLoader(); | ||
} | ||
|
||
@ParameterizedTest(name = "Verify \"{0}\" tokenizer model loading") | ||
@ValueSource(strings = {"en-ud-ewt", "fr-ud-gsd", "de-ud-gsd", "it-ud-vit", "nl-ud-alpino", | ||
"bg-ud-btb", "ca-ud-ancora", "cs-ud-pdt", "da-ud-ddt", "el-ud-gdt", "es-ud-gsd", "et-ud-edt", | ||
"eu-ud-bdt", "fi-ud-tdt", "hr-ud-set", "hy-ud-bsut", "is-ud-icepahc", "ka-ud-glc", "kk-ud-ktb", | ||
"ko-ud-kaist", "lv-ud-lvtb", "no-ud-bokmaal", "pl-ud-pdb", "pt-ud-gsd", "ro-ud-rrt", "ru-ud-gsd", | ||
"sr-ud-set", "sk-ud-snk", "sl-ud-ssj", "sv-ud-talbanken", "tr-ud-boun", "uk-ud-iu"}) | ||
public void testLoadModelByLanguage(String langModel) throws IOException { | ||
String modelName = "opennlp-" + langModel + "-lemmas-" + VER + BIN; | ||
LemmatizerModel model = loader.loadModel(Files.newInputStream(OPENNLP_DIR.resolve(modelName))); | ||
Assertions.assertNotNull(model); | ||
Assertions.assertTrue(model.isLoadedFromSerialized()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.