[ https://issues.apache.org/jira/browse/DRILL-8457?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17781042#comment-17781042 ]
ASF GitHub Bot commented on DRILL-8457: --------------------------------------- cgivre commented on code in PR #2840: URL: https://github.com/apache/drill/pull/2840#discussion_r1376443490 ########## contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVOptions.java: ########## @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.http; + + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; + +import java.util.Objects; + +@JsonInclude(JsonInclude.Include.NON_DEFAULT) +@JsonDeserialize(builder = HttpCSVOptions.HttpCSVOptionsBuilder.class) +public class HttpCSVOptions { + + + @JsonProperty + private final String delimiter; + + @JsonProperty + private final char quote; + + @JsonProperty + private final char quoteEscape; + + @JsonProperty + private final String lineSeparator; + + @JsonProperty + private final Boolean headerExtractionEnabled; + + @JsonProperty + private final long numberOfRowsToSkip; + + @JsonProperty + private final long numberOfRecordsToRead; + + @JsonProperty + private final boolean lineSeparatorDetectionEnabled; + + @JsonProperty + private final int maxColumns; + + @JsonProperty + private final int maxCharsPerColumn; + + @JsonProperty + private final boolean skipEmptyLines; + + @JsonProperty + private final boolean ignoreLeadingWhitespaces; + + @JsonProperty + private final boolean ignoreTrailingWhitespaces; + + @JsonProperty + private final String nullValue; + + HttpCSVOptions(HttpCSVOptionsBuilder builder) { + this.delimiter = builder.delimiter; + this.quote = builder.quote; + this.quoteEscape = builder.quoteEscape; + this.lineSeparator = builder.lineSeparator; + this.headerExtractionEnabled = builder.headerExtractionEnabled; + this.numberOfRowsToSkip = builder.numberOfRowsToSkip; + this.numberOfRecordsToRead = builder.numberOfRecordsToRead; + this.lineSeparatorDetectionEnabled = builder.lineSeparatorDetectionEnabled; + this.maxColumns = builder.maxColumns; + this.maxCharsPerColumn = builder.maxCharsPerColumn; + this.skipEmptyLines = builder.skipEmptyLines; + this.ignoreLeadingWhitespaces = builder.ignoreLeadingWhitespaces; + this.ignoreTrailingWhitespaces = builder.ignoreTrailingWhitespaces; + this.nullValue = builder.nullValue; + } + + public static HttpCSVOptionsBuilder builder() { + return new HttpCSVOptionsBuilder(); + } + + public String getDelimiter() { + return delimiter; + } + + public char getQuote() { + return quote; + } + + public char getQuoteEscape() { + return quoteEscape; + } + + public String getLineSeparator() { + return lineSeparator; + } + + public Boolean getHeaderExtractionEnabled() { + return headerExtractionEnabled; + } + + public long getNumberOfRowsToSkip() { + return numberOfRowsToSkip; + } + + public long getNumberOfRecordsToRead() { + return numberOfRecordsToRead; + } + + public boolean isLineSeparatorDetectionEnabled() { + return lineSeparatorDetectionEnabled; + } + + public int getMaxColumns() { + return maxColumns; + } + + public int getMaxCharsPerColumn() { + return maxCharsPerColumn; + } + + public boolean isSkipEmptyLines() { + return skipEmptyLines; + } + + public boolean isIgnoreLeadingWhitespaces() { + return ignoreLeadingWhitespaces; + } + + public boolean isIgnoreTrailingWhitespaces() { + return ignoreTrailingWhitespaces; + } + + public String getNullValue() { + return nullValue; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + HttpCSVOptions that = (HttpCSVOptions) o; + return quote == that.quote && quoteEscape == that.quoteEscape && numberOfRowsToSkip == that.numberOfRowsToSkip && numberOfRecordsToRead == that.numberOfRecordsToRead && lineSeparatorDetectionEnabled == that.lineSeparatorDetectionEnabled && maxColumns == that.maxColumns && maxCharsPerColumn == that.maxCharsPerColumn && skipEmptyLines == that.skipEmptyLines && ignoreLeadingWhitespaces == that.ignoreLeadingWhitespaces && ignoreTrailingWhitespaces == that.ignoreTrailingWhitespaces && delimiter.equals(that.delimiter) && lineSeparator.equals(that.lineSeparator) && Objects.equals(headerExtractionEnabled, that.headerExtractionEnabled) && nullValue.equals(that.nullValue); Review Comment: Nit: Please break this up into new lines. ########## contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVOptions.java: ########## @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.http; + + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; + +import java.util.Objects; + +@JsonInclude(JsonInclude.Include.NON_DEFAULT) +@JsonDeserialize(builder = HttpCSVOptions.HttpCSVOptionsBuilder.class) +public class HttpCSVOptions { + + + @JsonProperty + private final String delimiter; + + @JsonProperty + private final char quote; + + @JsonProperty + private final char quoteEscape; + + @JsonProperty + private final String lineSeparator; + + @JsonProperty + private final Boolean headerExtractionEnabled; + + @JsonProperty + private final long numberOfRowsToSkip; + + @JsonProperty + private final long numberOfRecordsToRead; + + @JsonProperty + private final boolean lineSeparatorDetectionEnabled; + + @JsonProperty + private final int maxColumns; + + @JsonProperty + private final int maxCharsPerColumn; + + @JsonProperty + private final boolean skipEmptyLines; + + @JsonProperty + private final boolean ignoreLeadingWhitespaces; + + @JsonProperty + private final boolean ignoreTrailingWhitespaces; + + @JsonProperty + private final String nullValue; + + HttpCSVOptions(HttpCSVOptionsBuilder builder) { + this.delimiter = builder.delimiter; + this.quote = builder.quote; + this.quoteEscape = builder.quoteEscape; + this.lineSeparator = builder.lineSeparator; + this.headerExtractionEnabled = builder.headerExtractionEnabled; + this.numberOfRowsToSkip = builder.numberOfRowsToSkip; + this.numberOfRecordsToRead = builder.numberOfRecordsToRead; + this.lineSeparatorDetectionEnabled = builder.lineSeparatorDetectionEnabled; + this.maxColumns = builder.maxColumns; + this.maxCharsPerColumn = builder.maxCharsPerColumn; + this.skipEmptyLines = builder.skipEmptyLines; + this.ignoreLeadingWhitespaces = builder.ignoreLeadingWhitespaces; + this.ignoreTrailingWhitespaces = builder.ignoreTrailingWhitespaces; + this.nullValue = builder.nullValue; + } + + public static HttpCSVOptionsBuilder builder() { + return new HttpCSVOptionsBuilder(); + } + + public String getDelimiter() { + return delimiter; + } + + public char getQuote() { + return quote; + } + + public char getQuoteEscape() { + return quoteEscape; + } + + public String getLineSeparator() { + return lineSeparator; + } + + public Boolean getHeaderExtractionEnabled() { + return headerExtractionEnabled; + } + + public long getNumberOfRowsToSkip() { + return numberOfRowsToSkip; + } + + public long getNumberOfRecordsToRead() { + return numberOfRecordsToRead; + } + + public boolean isLineSeparatorDetectionEnabled() { + return lineSeparatorDetectionEnabled; + } + + public int getMaxColumns() { + return maxColumns; + } + + public int getMaxCharsPerColumn() { + return maxCharsPerColumn; + } + + public boolean isSkipEmptyLines() { + return skipEmptyLines; + } + + public boolean isIgnoreLeadingWhitespaces() { + return ignoreLeadingWhitespaces; + } + + public boolean isIgnoreTrailingWhitespaces() { + return ignoreTrailingWhitespaces; + } + + public String getNullValue() { + return nullValue; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + HttpCSVOptions that = (HttpCSVOptions) o; + return quote == that.quote && quoteEscape == that.quoteEscape && numberOfRowsToSkip == that.numberOfRowsToSkip && numberOfRecordsToRead == that.numberOfRecordsToRead && lineSeparatorDetectionEnabled == that.lineSeparatorDetectionEnabled && maxColumns == that.maxColumns && maxCharsPerColumn == that.maxCharsPerColumn && skipEmptyLines == that.skipEmptyLines && ignoreLeadingWhitespaces == that.ignoreLeadingWhitespaces && ignoreTrailingWhitespaces == that.ignoreTrailingWhitespaces && delimiter.equals(that.delimiter) && lineSeparator.equals(that.lineSeparator) && Objects.equals(headerExtractionEnabled, that.headerExtractionEnabled) && nullValue.equals(that.nullValue); + } + + @Override + public int hashCode() { + return Objects.hash(delimiter, quote, quoteEscape, lineSeparator, headerExtractionEnabled, + numberOfRowsToSkip, numberOfRecordsToRead, lineSeparatorDetectionEnabled, maxColumns, + maxCharsPerColumn, skipEmptyLines, ignoreLeadingWhitespaces, ignoreTrailingWhitespaces, + nullValue); + } + + @Override + public String toString() { + return "HttpCSVOptions{" + "delimiter='" + delimiter + '\'' + ", quote=" + quote + ", " + Review Comment: Nit: Please use the `PlanStringBuilder` for the `toString()` method. > Allow configuring csv parser in http storage plugin configuration > ----------------------------------------------------------------- > > Key: DRILL-8457 > URL: https://issues.apache.org/jira/browse/DRILL-8457 > Project: Apache Drill > Issue Type: Improvement > Components: Storage - HTTP > Affects Versions: Future > Reporter: Zbigniew Tomanek > Priority: Minor > Fix For: Future > > > Currently there is no way to configure csv parser when http plugin is used. > Because of that some kind of files cannot be parsed (e.g. when any column has > more than 4096 chars or file has a delimiter different from `,`). > Since in DataWalk we utilize http plugin quite often we've changed our > internal fork of Drill so following parser/format properties can be > configured using additional `csvOptions` field: > > {code:json} > { > "csvOptions": { > "delimiter": "\t", > "quote": "\"", > "quote_escape": "\"", > "line_separator": "\n", > "header_extraction_enabled": null, > "number_of_rows_to_skip": 0, > "number_of_records_to_read": -1, > "line_separator_detection_enabled": true, > "max_columns": 512, > "max_chars_per_column": 4096, > "skip_empty_lines": true, > "ignore_leading_whitespaces": true, > "ignore_trailing_whitespaces": true, > "null_value": null > } > }{code} > I'd be glad to get feedback whether creating PR with these changes would > bring any value to the Drill -- This message was sent by Atlassian Jira (v8.20.10#820010)