-
Notifications
You must be signed in to change notification settings - Fork 448
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OPENNLP-1659 - Enhancements for DownloadUtil
- Make the BASE_URL and MODEL_URI_PATH configurable via System properties in order to be backwards compatible if models are removed from the release area of OpenNLP / ASF CDN - Make the OPENNLP_DOWNLOAD_HOME configurable, i.e. for tests in CI environments. This can also be useful for OpenNLP devs to avoid cleaning models in their user home. - Replace hard coded file separators for multi OS support - Log a DEBUG message if the model file already exists and the download is skipped. - Adds a test case to verify that models are not downloaded twice (if they exist) by testing for the related LOG output using LogCaptor. - Replaces slf4j-simple with logback (from LogCaptor) for further testing of specific output
- Loading branch information
Showing
8 changed files
with
312 additions
and
112 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
79 changes: 79 additions & 0 deletions
79
opennlp-tools/src/test/java/opennlp/tools/util/AbstractDownloadUtilTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package opennlp.tools.util; | ||
|
||
import java.io.IOException; | ||
import java.net.InetSocketAddress; | ||
import java.net.Socket; | ||
import java.nio.file.DirectoryStream; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.Paths; | ||
|
||
import org.junit.jupiter.api.BeforeAll; | ||
|
||
import opennlp.tools.EnabledWhenCDNAvailable; | ||
|
||
import static org.junit.jupiter.api.Assertions.fail; | ||
|
||
@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org") | ||
public abstract class AbstractDownloadUtilTest { | ||
|
||
private static final String APACHE_CDN = "dlcdn.apache.org"; | ||
|
||
@BeforeAll | ||
public static void cleanupWhenOnline() { | ||
boolean isOnline; | ||
try (Socket socket = new Socket()) { | ||
socket.connect(new InetSocketAddress(APACHE_CDN, 80), EnabledWhenCDNAvailable.TIMEOUT_MS); | ||
isOnline = true; | ||
} catch (IOException e) { | ||
// Unreachable, unresolvable or timeout | ||
isOnline = false; | ||
} | ||
// If CDN is available -> go cleanup in preparation of the actual tests | ||
if (isOnline) { | ||
wipeExistingModelFiles("-tokens-"); | ||
wipeExistingModelFiles("-sentence-"); | ||
wipeExistingModelFiles("-pos-"); | ||
wipeExistingModelFiles("-lemma-"); | ||
} | ||
} | ||
|
||
|
||
/* | ||
* Helper method that wipes out mode files if they exist on the text execution env. | ||
* Those model files are wiped from a hidden '.opennlp' subdirectory. | ||
* | ||
* Thereby, a clean download can be guaranteed - ín CDN is available and test are executed. | ||
*/ | ||
private static void wipeExistingModelFiles(final String fragment) { | ||
final Path dir = Paths.get(System.getProperty("OPENNLP_DOWNLOAD_HOME", | ||
System.getProperty("user.home"))).resolve(".opennlp"); | ||
if (Files.exists(dir)) { | ||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir, "*opennlp-*" + fragment + "*")) { | ||
for (Path modelFileToWipe : stream) { | ||
Files.deleteIfExists(modelFileToWipe); | ||
} | ||
} catch (IOException e) { | ||
fail(e.getLocalizedMessage()); | ||
} | ||
} | ||
} | ||
|
||
} |
97 changes: 97 additions & 0 deletions
97
opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilDownloadTwiceTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package opennlp.tools.util; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
|
||
import ch.qos.logback.classic.Level; | ||
import ch.qos.logback.classic.Logger; | ||
import ch.qos.logback.classic.LoggerContext; | ||
import nl.altindag.log.LogCaptor; | ||
import org.junit.jupiter.api.AfterAll; | ||
import org.junit.jupiter.api.BeforeAll; | ||
import org.junit.jupiter.api.Test; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import opennlp.tools.EnabledWhenCDNAvailable; | ||
import opennlp.tools.sentdetect.SentenceModel; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org") | ||
public class DownloadUtilDownloadTwiceTest extends AbstractDownloadUtilTest { | ||
|
||
/* | ||
* Programmatic change to debug log to ensure that we can see log messages to | ||
* confirm no duplicate download is happening | ||
*/ | ||
@BeforeAll | ||
public static void prepare() { | ||
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory(); | ||
Logger logger = context.getLogger("opennlp"); | ||
logger.setLevel(Level.DEBUG); | ||
} | ||
|
||
/* | ||
* Programmatic restore the default log level (= OFF) after the test | ||
*/ | ||
@AfterAll | ||
public static void cleanup() { | ||
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory(); | ||
Logger logger = context.getLogger("opennlp"); | ||
logger.setLevel(Level.OFF); | ||
} | ||
|
||
@Test | ||
public void testDownloadModelTwice() throws IOException { | ||
try (LogCaptor logCaptor = LogCaptor.forClass(DownloadUtil.class)) { | ||
|
||
DownloadUtil.downloadModel("de", | ||
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class); | ||
|
||
assertEquals(2, logCaptor.getDebugLogs().size()); | ||
checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "Download complete."); | ||
logCaptor.clearLogs(); | ||
|
||
// try to download again | ||
DownloadUtil.downloadModel("de", | ||
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class); | ||
assertEquals(1, logCaptor.getDebugLogs().size()); | ||
checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "already exists. Skipping download."); | ||
logCaptor.clearLogs(); | ||
|
||
DownloadUtil.downloadModel("de", | ||
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class); | ||
assertEquals(1, logCaptor.getDebugLogs().size()); | ||
checkDebugLogsContainMessageFragment(logCaptor.getDebugLogs(), "already exists. Skipping download."); | ||
logCaptor.clearLogs(); | ||
|
||
} | ||
} | ||
|
||
private void checkDebugLogsContainMessageFragment(List<String> debugLogs, String message) { | ||
for (String log : debugLogs) { | ||
if (log.contains(message)) { | ||
return; | ||
} | ||
} | ||
throw new AssertionError("Expected message fragment not found in logs: " + message); | ||
} | ||
|
||
} |
Oops, something went wrong.