[ https://issues.apache.org/jira/browse/OPENNLP-1416?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17649612#comment-17649612 ]
ASF GitHub Bot commented on OPENNLP-1416: ----------------------------------------- rzo1 commented on code in PR #461: URL: https://github.com/apache/opennlp/pull/461#discussion_r1052995489 ########## opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java: ########## @@ -154,61 +153,49 @@ public class ADNameSampleStream implements ObjectStream<NameSample> { private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream; - /** + /* * To keep the last left contraction part */ private String leftContractionPart = null; private final boolean splitHyphenatedTokens; /** - * Creates a new {@link NameSample} stream from a line stream, i.e. - * {@link ObjectStream}<{@link String}>, that could be a - * {@link PlainTextByLineStream} object. + * Initializes a new {@link ADNameSampleStream} stream from a {@link ObjectStream<String>}, + * that could be a {@link PlainTextByLineStream} object. * - * @param lineStream - * a stream of lines as {@link String} - * @param splitHyphenatedTokens - * if true hyphenated tokens will be separated: "carros-monstro" > - * "carros" "-" "monstro" + * @param lineStream An {@link ObjectStream<String>} as input. + * @param splitHyphenatedTokens If {@code true} hyphenated tokens will be separated: + * "carros-monstro" > "carros" "-" "monstro". */ public ADNameSampleStream(ObjectStream<String> lineStream, boolean splitHyphenatedTokens) { this.adSentenceStream = new ADSentenceStream(lineStream); this.splitHyphenatedTokens = splitHyphenatedTokens; } /** - * Creates a new {@link NameSample} stream from a {@link InputStream} + * Initializes a new {@link ADNameSampleStream} from an {@link InputStreamFactory} * - * @param in - * the Corpus {@link InputStream} - * @param charsetName - * the charset of the Arvores Deitadas Corpus - * @param splitHyphenatedTokens - * if true hyphenated tokens will be separated: "carros-monstro" > - * "carros" "-" "monstro" + * @param in The Corpus {@link InputStreamFactory}. + * @param charsetName The {@link java.nio.charset.Charset charset} to use + * for reading of the corpus. + * @param splitHyphenatedTokens If {@code true} hyphenated tokens will be separated: + * "carros-monstro" > "carros" "-" "monstro". */ @Deprecated public ADNameSampleStream(InputStreamFactory in, String charsetName, boolean splitHyphenatedTokens) throws IOException { - - try { - this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream( - in, charsetName)); - this.splitHyphenatedTokens = splitHyphenatedTokens; - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } + this(new PlainTextByLineStream(in, charsetName), splitHyphenatedTokens); } private int textID = -1; + @Override public NameSample read() throws IOException { Sentence paragraph; // we should look for text here. - while ((paragraph = this.adSentenceStream.read()) != null) { + if ((paragraph = this.adSentenceStream.read()) != null) { Review Comment: That looks like the `while` was a bug previously (emptying the whole underlying stream...) - good catch! ########## opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java: ########## @@ -64,35 +59,26 @@ public ADPOSSampleStream(ObjectStream<String> lineStream, boolean expandME, } /** - * Creates a new {@link POSSample} stream from a {@link InputStream} + * Creates a new {@link POSSample} stream from an {@link InputStreamFactory} * - * @param in - * the Corpus {@link InputStream} - * @param charsetName - * the charset of the Arvores Deitadas Corpus - * @param expandME - * if true will expand the multiword expressions, each word of the + * @param in The {@link InputStreamFactory} for the corpus. + * @param charsetName The {@link java.nio.charset.Charset charset} to use + * for reading of the corpus. + * @param expandME If {@code true} will expand the multiword expressions, each word of the * expression will have the POS Tag that was attributed to the - * expression plus the prefix B- or I- (CONLL convention) - * @param includeFeatures - * if true will combine the POS Tag with the feature tags + * expression plus the prefix {@code B-} or {@code I-} (CONLL convention). + * @param includeFeatures If {@code true} will combine the POS Tag with the feature tags. */ public ADPOSSampleStream(InputStreamFactory in, String charsetName, boolean expandME, boolean includeFeatures) throws IOException { - try { - this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(in, charsetName)); - this.expandME = expandME; - this.isIncludeFeatures = includeFeatures; - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); - } + this(new PlainTextByLineStream(in, charsetName), expandME, includeFeatures); } + @Override public POSSample read() throws IOException { Sentence paragraph; - while ((paragraph = this.adSentenceStream.read()) != null) { + if ((paragraph = this.adSentenceStream.read()) != null) { Review Comment: :-) > Enhance JavaDoc in opennlp.tools.formats.ad package > --------------------------------------------------- > > Key: OPENNLP-1416 > URL: https://issues.apache.org/jira/browse/OPENNLP-1416 > Project: OpenNLP > Issue Type: Improvement > Components: Formats > Affects Versions: 2.1.0 > Reporter: Martin Wiesner > Assignee: Martin Wiesner > Priority: Minor > Fix For: 2.1.1 > > > The JavaDoc the _opennlp.tools.formats.ad_ package suffers from several > inconsistencies and missing descriptions. Moreover, several typos are present > that need sanitizing. > It needs enhancements and/or additions to provide more clarity for readers. -- This message was sent by Atlassian Jira (v8.20.10#820010)