OPENNLP-788: Add initial LanguageDetector interface and Language class
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5a234de7 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5a234de7 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5a234de7 Branch: refs/heads/LangDetect Commit: 5a234de70fb18752ab0e65cf01f25aa4895bc051 Parents: 911d59f Author: William D C M SILVA <[email protected]> Authored: Wed Feb 15 10:03:28 2017 -0200 Committer: William D C M SILVA <[email protected]> Committed: Wed May 17 12:51:42 2017 -0300 ---------------------------------------------------------------------- .../java/opennlp/tools/langdetect/Language.java | 39 ++++++++++++++++++++ .../tools/langdetect/LanguageDetector.java | 33 +++++++++++++++++ 2 files changed, 72 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/5a234de7/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java new file mode 100644 index 0000000..773201f --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.langdetect; + +/** + * Class for holding the document language and its confidence + */ +public class Language { + private final String lang; + private final double confidence; + + public Language(String lang, double confidence) { + this.lang = lang; + this.confidence = confidence; + } + + public String getLang() { + return lang; + } + + public double getConfidence() { + return confidence; + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/5a234de7/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java new file mode 100644 index 0000000..ca897fd --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.langdetect; + +import java.util.Set; + +/** + * The interface for name finders which provide name tags for a sequence of tokens. + */ +public interface LanguageDetector { + + Language[] detectLanguage(CharSequence content); + + Set<String> getSupportedLanguages(); + + String getLanguageCoding(); + +}
