[
https://issues.apache.org/jira/browse/DRILL-8457?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17781042#comment-17781042
]
ASF GitHub Bot commented on DRILL-8457:
---------------------------------------
cgivre commented on code in PR #2840:
URL: https://github.com/apache/drill/pull/2840#discussion_r1376443490
##########
contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVOptions.java:
##########
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.http;
+
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
+
+import java.util.Objects;
+
+@JsonInclude(JsonInclude.Include.NON_DEFAULT)
+@JsonDeserialize(builder = HttpCSVOptions.HttpCSVOptionsBuilder.class)
+public class HttpCSVOptions {
+
+
+ @JsonProperty
+ private final String delimiter;
+
+ @JsonProperty
+ private final char quote;
+
+ @JsonProperty
+ private final char quoteEscape;
+
+ @JsonProperty
+ private final String lineSeparator;
+
+ @JsonProperty
+ private final Boolean headerExtractionEnabled;
+
+ @JsonProperty
+ private final long numberOfRowsToSkip;
+
+ @JsonProperty
+ private final long numberOfRecordsToRead;
+
+ @JsonProperty
+ private final boolean lineSeparatorDetectionEnabled;
+
+ @JsonProperty
+ private final int maxColumns;
+
+ @JsonProperty
+ private final int maxCharsPerColumn;
+
+ @JsonProperty
+ private final boolean skipEmptyLines;
+
+ @JsonProperty
+ private final boolean ignoreLeadingWhitespaces;
+
+ @JsonProperty
+ private final boolean ignoreTrailingWhitespaces;
+
+ @JsonProperty
+ private final String nullValue;
+
+ HttpCSVOptions(HttpCSVOptionsBuilder builder) {
+ this.delimiter = builder.delimiter;
+ this.quote = builder.quote;
+ this.quoteEscape = builder.quoteEscape;
+ this.lineSeparator = builder.lineSeparator;
+ this.headerExtractionEnabled = builder.headerExtractionEnabled;
+ this.numberOfRowsToSkip = builder.numberOfRowsToSkip;
+ this.numberOfRecordsToRead = builder.numberOfRecordsToRead;
+ this.lineSeparatorDetectionEnabled = builder.lineSeparatorDetectionEnabled;
+ this.maxColumns = builder.maxColumns;
+ this.maxCharsPerColumn = builder.maxCharsPerColumn;
+ this.skipEmptyLines = builder.skipEmptyLines;
+ this.ignoreLeadingWhitespaces = builder.ignoreLeadingWhitespaces;
+ this.ignoreTrailingWhitespaces = builder.ignoreTrailingWhitespaces;
+ this.nullValue = builder.nullValue;
+ }
+
+ public static HttpCSVOptionsBuilder builder() {
+ return new HttpCSVOptionsBuilder();
+ }
+
+ public String getDelimiter() {
+ return delimiter;
+ }
+
+ public char getQuote() {
+ return quote;
+ }
+
+ public char getQuoteEscape() {
+ return quoteEscape;
+ }
+
+ public String getLineSeparator() {
+ return lineSeparator;
+ }
+
+ public Boolean getHeaderExtractionEnabled() {
+ return headerExtractionEnabled;
+ }
+
+ public long getNumberOfRowsToSkip() {
+ return numberOfRowsToSkip;
+ }
+
+ public long getNumberOfRecordsToRead() {
+ return numberOfRecordsToRead;
+ }
+
+ public boolean isLineSeparatorDetectionEnabled() {
+ return lineSeparatorDetectionEnabled;
+ }
+
+ public int getMaxColumns() {
+ return maxColumns;
+ }
+
+ public int getMaxCharsPerColumn() {
+ return maxCharsPerColumn;
+ }
+
+ public boolean isSkipEmptyLines() {
+ return skipEmptyLines;
+ }
+
+ public boolean isIgnoreLeadingWhitespaces() {
+ return ignoreLeadingWhitespaces;
+ }
+
+ public boolean isIgnoreTrailingWhitespaces() {
+ return ignoreTrailingWhitespaces;
+ }
+
+ public String getNullValue() {
+ return nullValue;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ HttpCSVOptions that = (HttpCSVOptions) o;
+ return quote == that.quote && quoteEscape == that.quoteEscape &&
numberOfRowsToSkip == that.numberOfRowsToSkip && numberOfRecordsToRead ==
that.numberOfRecordsToRead && lineSeparatorDetectionEnabled ==
that.lineSeparatorDetectionEnabled && maxColumns == that.maxColumns &&
maxCharsPerColumn == that.maxCharsPerColumn && skipEmptyLines ==
that.skipEmptyLines && ignoreLeadingWhitespaces ==
that.ignoreLeadingWhitespaces && ignoreTrailingWhitespaces ==
that.ignoreTrailingWhitespaces && delimiter.equals(that.delimiter) &&
lineSeparator.equals(that.lineSeparator) &&
Objects.equals(headerExtractionEnabled, that.headerExtractionEnabled) &&
nullValue.equals(that.nullValue);
Review Comment:
Nit: Please break this up into new lines.
##########
contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpCSVOptions.java:
##########
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.http;
+
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
+
+import java.util.Objects;
+
+@JsonInclude(JsonInclude.Include.NON_DEFAULT)
+@JsonDeserialize(builder = HttpCSVOptions.HttpCSVOptionsBuilder.class)
+public class HttpCSVOptions {
+
+
+ @JsonProperty
+ private final String delimiter;
+
+ @JsonProperty
+ private final char quote;
+
+ @JsonProperty
+ private final char quoteEscape;
+
+ @JsonProperty
+ private final String lineSeparator;
+
+ @JsonProperty
+ private final Boolean headerExtractionEnabled;
+
+ @JsonProperty
+ private final long numberOfRowsToSkip;
+
+ @JsonProperty
+ private final long numberOfRecordsToRead;
+
+ @JsonProperty
+ private final boolean lineSeparatorDetectionEnabled;
+
+ @JsonProperty
+ private final int maxColumns;
+
+ @JsonProperty
+ private final int maxCharsPerColumn;
+
+ @JsonProperty
+ private final boolean skipEmptyLines;
+
+ @JsonProperty
+ private final boolean ignoreLeadingWhitespaces;
+
+ @JsonProperty
+ private final boolean ignoreTrailingWhitespaces;
+
+ @JsonProperty
+ private final String nullValue;
+
+ HttpCSVOptions(HttpCSVOptionsBuilder builder) {
+ this.delimiter = builder.delimiter;
+ this.quote = builder.quote;
+ this.quoteEscape = builder.quoteEscape;
+ this.lineSeparator = builder.lineSeparator;
+ this.headerExtractionEnabled = builder.headerExtractionEnabled;
+ this.numberOfRowsToSkip = builder.numberOfRowsToSkip;
+ this.numberOfRecordsToRead = builder.numberOfRecordsToRead;
+ this.lineSeparatorDetectionEnabled = builder.lineSeparatorDetectionEnabled;
+ this.maxColumns = builder.maxColumns;
+ this.maxCharsPerColumn = builder.maxCharsPerColumn;
+ this.skipEmptyLines = builder.skipEmptyLines;
+ this.ignoreLeadingWhitespaces = builder.ignoreLeadingWhitespaces;
+ this.ignoreTrailingWhitespaces = builder.ignoreTrailingWhitespaces;
+ this.nullValue = builder.nullValue;
+ }
+
+ public static HttpCSVOptionsBuilder builder() {
+ return new HttpCSVOptionsBuilder();
+ }
+
+ public String getDelimiter() {
+ return delimiter;
+ }
+
+ public char getQuote() {
+ return quote;
+ }
+
+ public char getQuoteEscape() {
+ return quoteEscape;
+ }
+
+ public String getLineSeparator() {
+ return lineSeparator;
+ }
+
+ public Boolean getHeaderExtractionEnabled() {
+ return headerExtractionEnabled;
+ }
+
+ public long getNumberOfRowsToSkip() {
+ return numberOfRowsToSkip;
+ }
+
+ public long getNumberOfRecordsToRead() {
+ return numberOfRecordsToRead;
+ }
+
+ public boolean isLineSeparatorDetectionEnabled() {
+ return lineSeparatorDetectionEnabled;
+ }
+
+ public int getMaxColumns() {
+ return maxColumns;
+ }
+
+ public int getMaxCharsPerColumn() {
+ return maxCharsPerColumn;
+ }
+
+ public boolean isSkipEmptyLines() {
+ return skipEmptyLines;
+ }
+
+ public boolean isIgnoreLeadingWhitespaces() {
+ return ignoreLeadingWhitespaces;
+ }
+
+ public boolean isIgnoreTrailingWhitespaces() {
+ return ignoreTrailingWhitespaces;
+ }
+
+ public String getNullValue() {
+ return nullValue;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ HttpCSVOptions that = (HttpCSVOptions) o;
+ return quote == that.quote && quoteEscape == that.quoteEscape &&
numberOfRowsToSkip == that.numberOfRowsToSkip && numberOfRecordsToRead ==
that.numberOfRecordsToRead && lineSeparatorDetectionEnabled ==
that.lineSeparatorDetectionEnabled && maxColumns == that.maxColumns &&
maxCharsPerColumn == that.maxCharsPerColumn && skipEmptyLines ==
that.skipEmptyLines && ignoreLeadingWhitespaces ==
that.ignoreLeadingWhitespaces && ignoreTrailingWhitespaces ==
that.ignoreTrailingWhitespaces && delimiter.equals(that.delimiter) &&
lineSeparator.equals(that.lineSeparator) &&
Objects.equals(headerExtractionEnabled, that.headerExtractionEnabled) &&
nullValue.equals(that.nullValue);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(delimiter, quote, quoteEscape, lineSeparator,
headerExtractionEnabled,
+ numberOfRowsToSkip, numberOfRecordsToRead,
lineSeparatorDetectionEnabled, maxColumns,
+ maxCharsPerColumn, skipEmptyLines, ignoreLeadingWhitespaces,
ignoreTrailingWhitespaces,
+ nullValue);
+ }
+
+ @Override
+ public String toString() {
+ return "HttpCSVOptions{" + "delimiter='" + delimiter + '\'' + ", quote=" +
quote + ", " +
Review Comment:
Nit: Please use the `PlanStringBuilder` for the `toString()` method.
> Allow configuring csv parser in http storage plugin configuration
> -----------------------------------------------------------------
>
> Key: DRILL-8457
> URL: https://issues.apache.org/jira/browse/DRILL-8457
> Project: Apache Drill
> Issue Type: Improvement
> Components: Storage - HTTP
> Affects Versions: Future
> Reporter: Zbigniew Tomanek
> Priority: Minor
> Fix For: Future
>
>
> Currently there is no way to configure csv parser when http plugin is used.
> Because of that some kind of files cannot be parsed (e.g. when any column has
> more than 4096 chars or file has a delimiter different from `,`).
> Since in DataWalk we utilize http plugin quite often we've changed our
> internal fork of Drill so following parser/format properties can be
> configured using additional `csvOptions` field:
>
> {code:json}
> {
> "csvOptions": {
> "delimiter": "\t",
> "quote": "\"",
> "quote_escape": "\"",
> "line_separator": "\n",
> "header_extraction_enabled": null,
> "number_of_rows_to_skip": 0,
> "number_of_records_to_read": -1,
> "line_separator_detection_enabled": true,
> "max_columns": 512,
> "max_chars_per_column": 4096,
> "skip_empty_lines": true,
> "ignore_leading_whitespaces": true,
> "ignore_trailing_whitespaces": true,
> "null_value": null
> }
> }{code}
> I'd be glad to get feedback whether creating PR with these changes would
> bring any value to the Drill
--
This message was sent by Atlassian Jira
(v8.20.10#820010)