This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch experimental/download_util_enhancements in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 5ecc81eabc0bec77a96efdef2b96007d724996d4 Author: Richard Zowalla <[email protected]> AuthorDate: Tue Nov 26 20:10:16 2024 +0100 x --- .../main/java/opennlp/tools/util/DownloadUtil.java | 34 ++++++----- .../java/opennlp/tools/util/DownloadUtilTest.java | 65 +++++++++++----------- 2 files changed, 53 insertions(+), 46 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java index 76fb7bc6..57895c38 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/DownloadUtil.java @@ -18,6 +18,7 @@ package opennlp.tools.util; import java.io.BufferedReader; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -32,6 +33,7 @@ import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Collections; import java.util.Formatter; import java.util.HashMap; import java.util.List; @@ -75,15 +77,7 @@ public class DownloadUtil { private static final String BASE_URL = "https://dlcdn.apache.org/opennlp/"; private static final String MODELS_UD_MODELS_1_2 = "models/ud-models-1.2/"; - public static final Map<String, Map<ModelType, String>> available_models; - - static { - try { - available_models = new DownloadParser(new URL(BASE_URL + MODELS_UD_MODELS_1_2)).getAvailableModels(); - } catch (MalformedURLException e) { - throw new RuntimeException(e); - } - } + private static Map<String, Map<ModelType, String>> availableModels; /** * Triggers a download for the specified {@link DownloadUtil.ModelType}. @@ -98,8 +92,8 @@ public class DownloadUtil { public static <T extends BaseModel> T downloadModel(String language, ModelType modelType, Class<T> type) throws IOException { - if (available_models.containsKey(language)) { - final String url = (available_models.get(language).get(modelType)); + if (getAvailableModels().containsKey(language)) { + final String url = (getAvailableModels().get(language).get(modelType)); if (url != null) { return downloadModel(new URL(url), type); } @@ -124,12 +118,12 @@ public class DownloadUtil { */ public static <T extends BaseModel> T downloadModel(URL url, Class<T> type) throws IOException { - final Path homeDirectory = Paths.get(System.getProperty("user.home") + "/.opennlp/"); + final Path homeDirectory = Paths.get(System.getProperty("user.home")).resolve(".opennlp"); if (!Files.isDirectory(homeDirectory)) { homeDirectory.toFile().mkdir(); } - final String filename = url.toString().substring(url.toString().lastIndexOf("/") + 1); + final String filename = url.toString().substring(url.toString().lastIndexOf(File.separator) + 1); final Path localFile = Paths.get(homeDirectory.toString(), filename); if (!Files.exists(localFile)) { @@ -141,8 +135,9 @@ public class DownloadUtil { validateModel(new URL(url + ".sha512"), localFile); - logger.debug("Download complete."); + } else { + logger.debug("Model file '{}' already exists. Skipping download.", filename); } try { @@ -152,6 +147,17 @@ public class DownloadUtil { } } + public static Map<String, Map<ModelType, String>> getAvailableModels() { + if(availableModels == null) { + try { + availableModels = new DownloadParser(new URL(BASE_URL + MODELS_UD_MODELS_1_2)).getAvailableModels(); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + } + return Collections.unmodifiableMap(availableModels); + } + /** * Validates the downloaded model. * diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java index 6ab0aa4c..ae6f13e3 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/DownloadUtilTest.java @@ -104,6 +104,7 @@ public class DownloadUtilTest { @EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org") public void testDownloadModelByURL(String language, URL url) throws IOException { TokenizerModel model = DownloadUtil.downloadModel(url, TokenizerModel.class); + System.out.println(url); assertNotNull(model); assertEquals(language, model.getLanguage()); assertTrue(model.isLoadedFromSerialized()); @@ -123,38 +124,38 @@ public class DownloadUtilTest { // Note: This needs to be public as JUnit 5 requires it like this. public static Stream<Arguments> provideURLs() { return Stream.of( - Arguments.of("en", DownloadUtil.available_models.get("en").get(MT_TOKENIZER)), - Arguments.of("fr", DownloadUtil.available_models.get("fr").get(MT_TOKENIZER)), - Arguments.of("de", DownloadUtil.available_models.get("de").get(MT_TOKENIZER)), - Arguments.of("it", DownloadUtil.available_models.get("it").get(MT_TOKENIZER)), - Arguments.of("nl", DownloadUtil.available_models.get("nl").get(MT_TOKENIZER)), - Arguments.of("bg", DownloadUtil.available_models.get("bg").get(MT_TOKENIZER)), - Arguments.of("ca", DownloadUtil.available_models.get("ca").get(MT_TOKENIZER)), - Arguments.of("cs", DownloadUtil.available_models.get("cs").get(MT_TOKENIZER)), - Arguments.of("da", DownloadUtil.available_models.get("da").get(MT_TOKENIZER)), - Arguments.of("el", DownloadUtil.available_models.get("el").get(MT_TOKENIZER)), - Arguments.of("es", DownloadUtil.available_models.get("es").get(MT_TOKENIZER)), - Arguments.of("et", DownloadUtil.available_models.get("et").get(MT_TOKENIZER)), - Arguments.of("eu", DownloadUtil.available_models.get("eu").get(MT_TOKENIZER)), - Arguments.of("fi", DownloadUtil.available_models.get("fi").get(MT_TOKENIZER)), - Arguments.of("hr", DownloadUtil.available_models.get("hr").get(MT_TOKENIZER)), - Arguments.of("hy", DownloadUtil.available_models.get("hy").get(MT_TOKENIZER)), - Arguments.of("is", DownloadUtil.available_models.get("is").get(MT_TOKENIZER)), - Arguments.of("ka", DownloadUtil.available_models.get("ka").get(MT_TOKENIZER)), - Arguments.of("kk", DownloadUtil.available_models.get("kk").get(MT_TOKENIZER)), - Arguments.of("ko", DownloadUtil.available_models.get("ko").get(MT_TOKENIZER)), - Arguments.of("lv", DownloadUtil.available_models.get("lv").get(MT_TOKENIZER)), - Arguments.of("no", DownloadUtil.available_models.get("no").get(MT_TOKENIZER)), - Arguments.of("pl", DownloadUtil.available_models.get("pl").get(MT_TOKENIZER)), - Arguments.of("pt", DownloadUtil.available_models.get("pt").get(MT_TOKENIZER)), - Arguments.of("ro", DownloadUtil.available_models.get("ro").get(MT_TOKENIZER)), - Arguments.of("ru", DownloadUtil.available_models.get("ru").get(MT_TOKENIZER)), - Arguments.of("sk", DownloadUtil.available_models.get("sk").get(MT_TOKENIZER)), - Arguments.of("sl", DownloadUtil.available_models.get("sl").get(MT_TOKENIZER)), - Arguments.of("sr", DownloadUtil.available_models.get("sr").get(MT_TOKENIZER)), - Arguments.of("sv", DownloadUtil.available_models.get("sv").get(MT_TOKENIZER)), - Arguments.of("tr", DownloadUtil.available_models.get("tr").get(MT_TOKENIZER)), - Arguments.of("uk", DownloadUtil.available_models.get("uk").get(MT_TOKENIZER)) + Arguments.of("en", DownloadUtil.getAvailableModels().get("en").get(MT_TOKENIZER)), + Arguments.of("fr", DownloadUtil.getAvailableModels().get("fr").get(MT_TOKENIZER)), + Arguments.of("de", DownloadUtil.getAvailableModels().get("de").get(MT_TOKENIZER)), + Arguments.of("it", DownloadUtil.getAvailableModels().get("it").get(MT_TOKENIZER)), + Arguments.of("nl", DownloadUtil.getAvailableModels().get("nl").get(MT_TOKENIZER)), + Arguments.of("bg", DownloadUtil.getAvailableModels().get("bg").get(MT_TOKENIZER)), + Arguments.of("ca", DownloadUtil.getAvailableModels().get("ca").get(MT_TOKENIZER)), + Arguments.of("cs", DownloadUtil.getAvailableModels().get("cs").get(MT_TOKENIZER)), + Arguments.of("da", DownloadUtil.getAvailableModels().get("da").get(MT_TOKENIZER)), + Arguments.of("el", DownloadUtil.getAvailableModels().get("el").get(MT_TOKENIZER)), + Arguments.of("es", DownloadUtil.getAvailableModels().get("es").get(MT_TOKENIZER)), + Arguments.of("et", DownloadUtil.getAvailableModels().get("et").get(MT_TOKENIZER)), + Arguments.of("eu", DownloadUtil.getAvailableModels().get("eu").get(MT_TOKENIZER)), + Arguments.of("fi", DownloadUtil.getAvailableModels().get("fi").get(MT_TOKENIZER)), + Arguments.of("hr", DownloadUtil.getAvailableModels().get("hr").get(MT_TOKENIZER)), + Arguments.of("hy", DownloadUtil.getAvailableModels().get("hy").get(MT_TOKENIZER)), + Arguments.of("is", DownloadUtil.getAvailableModels().get("is").get(MT_TOKENIZER)), + Arguments.of("ka", DownloadUtil.getAvailableModels().get("ka").get(MT_TOKENIZER)), + Arguments.of("kk", DownloadUtil.getAvailableModels().get("kk").get(MT_TOKENIZER)), + Arguments.of("ko", DownloadUtil.getAvailableModels().get("ko").get(MT_TOKENIZER)), + Arguments.of("lv", DownloadUtil.getAvailableModels().get("lv").get(MT_TOKENIZER)), + Arguments.of("no", DownloadUtil.getAvailableModels().get("no").get(MT_TOKENIZER)), + Arguments.of("pl", DownloadUtil.getAvailableModels().get("pl").get(MT_TOKENIZER)), + Arguments.of("pt", DownloadUtil.getAvailableModels().get("pt").get(MT_TOKENIZER)), + Arguments.of("ro", DownloadUtil.getAvailableModels().get("ro").get(MT_TOKENIZER)), + Arguments.of("ru", DownloadUtil.getAvailableModels().get("ru").get(MT_TOKENIZER)), + Arguments.of("sk", DownloadUtil.getAvailableModels().get("sk").get(MT_TOKENIZER)), + Arguments.of("sl", DownloadUtil.getAvailableModels().get("sl").get(MT_TOKENIZER)), + Arguments.of("sr", DownloadUtil.getAvailableModels().get("sr").get(MT_TOKENIZER)), + Arguments.of("sv", DownloadUtil.getAvailableModels().get("sv").get(MT_TOKENIZER)), + Arguments.of("tr", DownloadUtil.getAvailableModels().get("tr").get(MT_TOKENIZER)), + Arguments.of("uk", DownloadUtil.getAvailableModels().get("uk").get(MT_TOKENIZER)) ); } }
