This is an automated email from the ASF dual-hosted git repository. progers pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push: new 935936b DRILL-6168: Revise format plugin table functions 935936b is described below commit 935936b87f730f176bfbaabf7e68d7284563dc4b Author: Paul Rogers <par0...@yahoo.com> AuthorDate: Sun Apr 12 00:19:02 2020 -0700 DRILL-6168: Revise format plugin table functions Allows table functions to inherit properties from a defined format plugin. Also DRILL-7612: enforces immutability for all format plugins. --- .../drill/exec/store/esri/ShpFormatConfig.java | 31 ++- .../drill/exec/store/excel/ExcelFormatConfig.java | 76 +++--- .../drill/exec/store/hdf5/HDF5FormatConfig.java | 31 ++- .../drill/exec/store/ltsv/LTSVFormatPlugin.java | 2 +- .../exec/store/ltsv/LTSVFormatPluginConfig.java | 28 ++- .../exec/store/mapr/TableFormatPluginConfig.java | 4 +- .../exec/store/syslog/SyslogFormatConfig.java | 62 ++--- .../drill/exec/store/syslog/TestSyslogFormat.java | 14 +- .../apache/drill/exec/dotdrill/DotDrillFile.java | 15 +- .../logical/ConvertCountToDirectScanRule.java | 2 +- .../drill/exec/planner/logical/DrillTable.java | 48 +--- .../exec/planner/logical/DynamicDrillTable.java | 10 +- .../planner/sql/handlers/AnalyzeTableHandler.java | 2 +- .../sql/handlers/RefreshMetadataHandler.java | 2 +- .../apache/drill/exec/server/DrillbitContext.java | 1 - .../drill/exec/store/avro/AvroFormatConfig.java | 22 +- .../drill/exec/store/avro/AvroFormatPlugin.java | 2 +- .../drill/exec/store/dfs/FileSystemPlugin.java | 14 +- .../apache/drill/exec/store/dfs/FormatCreator.java | 10 +- .../store/dfs/FormatPluginOptionExtractor.java | 10 +- .../store/dfs/FormatPluginOptionsDescriptor.java | 255 ++++++++++++++++----- .../exec/store/dfs/NamedFormatPluginConfig.java | 39 ++-- .../exec/store/dfs/WorkspaceSchemaFactory.java | 153 ++++++++----- .../exec/store/easy/json/JSONFormatPlugin.java | 56 ++--- .../sequencefile/SequenceFileFormatConfig.java | 11 +- .../sequencefile/SequenceFileFormatPlugin.java | 2 +- .../exec/store/easy/text/TextFormatPlugin.java | 68 ++++-- .../easy/text/reader/TextParsingSettings.java | 3 +- .../exec/store/httpd/HttpdLogFormatConfig.java | 56 +++-- .../drill/exec/store/image/ImageFormatConfig.java | 27 ++- .../drill/exec/store/log/LogFormatConfig.java | 71 +++--- .../exec/store/parquet/ParquetFormatConfig.java | 17 +- .../drill/exec/store/pcap/PcapBatchReader.java | 2 +- .../drill/exec/store/pcap/PcapFormatConfig.java | 21 +- .../drill/exec/util/StoragePluginTestUtils.java | 31 +-- .../main/resources/bootstrap-storage-plugins.json | 20 +- .../apache/drill/TestSchemaWithTableFunction.java | 3 +- .../org/apache/drill/TestSelectWithOption.java | 13 +- .../exec/physical/impl/writer/TestTextWriter.java | 70 +++--- .../drill/exec/store/TestPluginRegistry.java | 12 +- .../store/easy/text/compliant/BaseCsvTest.java | 14 +- .../drill/exec/store/httpd/TestHTTPDLogReader.java | 4 +- .../apache/drill/exec/store/log/TestLogReader.java | 172 +++++++------- .../store/parquet/TestParquetReaderConfig.java | 3 +- .../drill/exec/store/pcap/TestPcapEVFReader.java | 2 +- .../drill/exec/store/pcap/TestSessionizePCAP.java | 4 +- .../java/org/apache/drill/common/JSONOptions.java | 6 +- 47 files changed, 933 insertions(+), 588 deletions(-) diff --git a/contrib/format-esri/src/main/java/org/apache/drill/exec/store/esri/ShpFormatConfig.java b/contrib/format-esri/src/main/java/org/apache/drill/exec/store/esri/ShpFormatConfig.java index 8f9135b..fd59eb3 100644 --- a/contrib/format-esri/src/main/java/org/apache/drill/exec/store/esri/ShpFormatConfig.java +++ b/contrib/format-esri/src/main/java/org/apache/drill/exec/store/esri/ShpFormatConfig.java @@ -18,12 +18,15 @@ package org.apache.drill.exec.store.esri; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; + +import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.logical.FormatPluginConfig; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; -import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Objects; @@ -34,7 +37,14 @@ import java.util.Objects; @JsonInclude(JsonInclude.Include.NON_DEFAULT) public class ShpFormatConfig implements FormatPluginConfig { - public List<String> extensions = Collections.singletonList("shp"); + private final List<String> extensions; + + @JsonCreator + public ShpFormatConfig( + @JsonProperty("extensions") List<String> extensions) { + this.extensions = extensions == null ? + ImmutableList.of("shp") : ImmutableList.copyOf(extensions); + } @JsonInclude(JsonInclude.Include.NON_DEFAULT) public List<String> getExtensions() { @@ -42,14 +52,12 @@ public class ShpFormatConfig implements FormatPluginConfig { } public ShpBatchReader.ShpReaderConfig getReaderConfig(ShpFormatPlugin plugin) { - ShpBatchReader.ShpReaderConfig readerConfig = new ShpBatchReader.ShpReaderConfig(plugin); - - return readerConfig; + return new ShpBatchReader.ShpReaderConfig(plugin); } @Override public int hashCode() { - return Arrays.hashCode(new Object[]{extensions}); + return Objects.hash(extensions); } @Override @@ -60,7 +68,14 @@ public class ShpFormatConfig implements FormatPluginConfig { if (obj == null || getClass() != obj.getClass()) { return false; } - ShpFormatConfig other = (ShpFormatConfig)obj; + ShpFormatConfig other = (ShpFormatConfig) obj; return Objects.equals(extensions, other.getExtensions()); } + + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("extensions", extensions) + .toString(); + } } diff --git a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelFormatConfig.java b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelFormatConfig.java index b347269..533b4b5 100644 --- a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelFormatConfig.java +++ b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelFormatConfig.java @@ -18,14 +18,16 @@ package org.apache.drill.exec.store.excel; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.logical.FormatPluginConfig; import org.apache.drill.exec.store.excel.ExcelBatchReader.ExcelReaderConfig; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Objects; @@ -35,27 +37,42 @@ import java.util.Objects; public class ExcelFormatConfig implements FormatPluginConfig { // This is the theoretical maximum number of rows in an Excel spreadsheet - private final int MAX_ROWS = 1048576; - - // TODO: Bad things happen if fields change after created. - // That is, if this config is stored in the plugin registry, then - // later modified. - // Change all these to be private final, and add constructor. - // See DRILL-7612. - - public List<String> extensions = Collections.singletonList("xlsx"); - - public int headerRow; - - public int lastRow = MAX_ROWS; - - public int firstColumn; - - public int lastColumn; - - public boolean allTextMode; + private final int MAX_ROWS = 1_048_576; + + private final List<String> extensions; + private final int headerRow; + private final int lastRow; + private final int firstColumn; + private final int lastColumn; + private final boolean allTextMode; + private final String sheetName; + + // Omitted properties take reasonable defaults + @JsonCreator + public ExcelFormatConfig( + @JsonProperty("extensions") List<String> extensions, + @JsonProperty("headerRow") Integer headerRow, + @JsonProperty("lastRow") Integer lastRow, + @JsonProperty("firstColumn") Integer firstColumn, + @JsonProperty("lastColumn") Integer lastColumn, + @JsonProperty("allTextMode") Boolean allTextMode, + @JsonProperty("sheetName") String sheetName) { + this.extensions = extensions == null + ? Collections.singletonList("xlsx") + : ImmutableList.copyOf(extensions); + this.headerRow = headerRow == null ? 0 : headerRow; + this.lastRow = lastRow == null ? MAX_ROWS : + Math.min(MAX_ROWS, lastRow); + this.firstColumn = firstColumn == null ? 0 : firstColumn; + this.lastColumn = lastColumn == null ? 0 : lastColumn; + this.allTextMode = allTextMode == null ? false : allTextMode; + this.sheetName = sheetName == null ? "" : sheetName; + } - public String sheetName = ""; + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public List<String> getExtensions() { + return extensions; + } public int getHeaderRow() { return headerRow; @@ -65,10 +82,6 @@ public class ExcelFormatConfig implements FormatPluginConfig { return lastRow; } - public String getSheetName() { - return sheetName; - } - public int getFirstColumn() { return firstColumn; } @@ -81,20 +94,19 @@ public class ExcelFormatConfig implements FormatPluginConfig { return allTextMode; } + public String getSheetName() { + return sheetName; + } + public ExcelReaderConfig getReaderConfig(ExcelFormatPlugin plugin) { ExcelReaderConfig readerConfig = new ExcelReaderConfig(plugin); return readerConfig; } - @JsonInclude(JsonInclude.Include.NON_DEFAULT) - public List<String> getExtensions() { - return extensions; - } - @Override public int hashCode() { - return Arrays.hashCode( - new Object[]{extensions, headerRow, lastRow, firstColumn, lastColumn, allTextMode, sheetName}); + return Objects.hash(extensions, headerRow, lastRow, + firstColumn, lastColumn, allTextMode, sheetName); } @Override diff --git a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java index 87d013d..3ce2bd9 100644 --- a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java +++ b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java @@ -18,11 +18,15 @@ package org.apache.drill.exec.store.hdf5; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; + +import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.logical.FormatPluginConfig; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Objects; @@ -30,9 +34,18 @@ import java.util.Objects; @JsonTypeName(HDF5FormatPlugin.DEFAULT_NAME) public class HDF5FormatConfig implements FormatPluginConfig { - public List<String> extensions = Collections.singletonList("h5"); + private final List<String> extensions; + private final String defaultPath; - public String defaultPath; + @JsonCreator + public HDF5FormatConfig( + @JsonProperty("extensions") List<String> extensions, + @JsonProperty("defaultPath") String defaultPath) { + this.extensions = extensions == null + ? Collections.singletonList("h5") + : ImmutableList.copyOf(extensions); + this.defaultPath = defaultPath; + } @JsonInclude(JsonInclude.Include.NON_DEFAULT) public List<String> getExtensions() { @@ -53,11 +66,19 @@ public class HDF5FormatConfig implements FormatPluginConfig { } HDF5FormatConfig other = (HDF5FormatConfig) obj; return Objects.equals(extensions, other.getExtensions()) && - Objects.equals(defaultPath, other.defaultPath); + Objects.equals(defaultPath, other.defaultPath); } @Override public int hashCode() { - return Arrays.hashCode(new Object[]{extensions, defaultPath}); + return Objects.hash(extensions, defaultPath); + } + + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("extensions", extensions) + .field("default path", defaultPath) + .toString(); } } diff --git a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java index 7284409..a1d5c20 100644 --- a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java +++ b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java @@ -42,7 +42,7 @@ public class LTSVFormatPlugin extends EasyFormatPlugin<LTSVFormatPluginConfig> { private static final String DEFAULT_NAME = "ltsv"; public LTSVFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig storageConfig) { - this(name, context, fsConf, storageConfig, new LTSVFormatPluginConfig()); + this(name, context, fsConf, storageConfig, new LTSVFormatPluginConfig(null)); } public LTSVFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig config, LTSVFormatPluginConfig formatPluginConfig) { diff --git a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java index 1e96b74..11b0554 100644 --- a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java +++ b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java @@ -17,12 +17,15 @@ */ package org.apache.drill.exec.store.ltsv; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; + +import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.logical.FormatPluginConfig; import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; -import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -30,21 +33,23 @@ import java.util.Objects; public class LTSVFormatPluginConfig implements FormatPluginConfig { private static final List<String> DEFAULT_EXTS = ImmutableList.of("ltsv"); - public List<String> extensions; + private final List<String> extensions; + + @JsonCreator + public LTSVFormatPluginConfig( + @JsonProperty("extensions") List<String> extensions) { + this.extensions = extensions == null ? + DEFAULT_EXTS : ImmutableList.copyOf(extensions); + } @JsonInclude(JsonInclude.Include.NON_DEFAULT) public List<String> getExtensions() { - if (extensions == null) { - // when loading an old JSONFormatConfig that doesn't contain an "extensions" attribute - return DEFAULT_EXTS; - } return extensions; } @Override public int hashCode() { - List<String> array = extensions != null ? extensions : DEFAULT_EXTS; - return Arrays.hashCode(array.toArray(new String[array.size()])); + return Objects.hash(extensions); } @Override @@ -57,4 +62,11 @@ public class LTSVFormatPluginConfig implements FormatPluginConfig { LTSVFormatPluginConfig that = (LTSVFormatPluginConfig) obj; return Objects.equals(extensions, that.extensions); } + + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("extensions", extensions) + .toString(); + } } diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/TableFormatPluginConfig.java b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/TableFormatPluginConfig.java index f30d2b6..ffd3d92 100644 --- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/TableFormatPluginConfig.java +++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/TableFormatPluginConfig.java @@ -25,9 +25,7 @@ public abstract class TableFormatPluginConfig implements FormatPluginConfig { public boolean equals(Object obj) { if (this == obj) { return true; - } else if (obj == null) { - return false; - } else if (getClass() != obj.getClass()) { + } else if (obj == null || getClass() != obj.getClass()) { return false; } return impEquals(obj); diff --git a/contrib/format-syslog/src/main/java/org/apache/drill/exec/store/syslog/SyslogFormatConfig.java b/contrib/format-syslog/src/main/java/org/apache/drill/exec/store/syslog/SyslogFormatConfig.java index 184031a..e851e31 100644 --- a/contrib/format-syslog/src/main/java/org/apache/drill/exec/store/syslog/SyslogFormatConfig.java +++ b/contrib/format-syslog/src/main/java/org/apache/drill/exec/store/syslog/SyslogFormatConfig.java @@ -19,21 +19,35 @@ package org.apache.drill.exec.store.syslog; import com.fasterxml.jackson.annotation.JsonTypeName; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; -import org.apache.drill.shaded.guava.com.google.common.base.Objects; +import com.fasterxml.jackson.annotation.JsonProperty; + +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; +import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.logical.FormatPluginConfig; -import java.util.Arrays; import java.util.List; -import java.util.ArrayList; +import java.util.Objects; @JsonTypeName("syslog") @JsonInclude(JsonInclude.Include.NON_DEFAULT) public class SyslogFormatConfig implements FormatPluginConfig { - public List<String> extensions; - public int maxErrors = 10; - public boolean flattenStructuredData; + private final List<String> extensions; + private final int maxErrors; + private final boolean flattenStructuredData; + + @JsonCreator + public SyslogFormatConfig( + @JsonProperty("extensions") List<String> extensions, + @JsonProperty("maxErrors") Integer maxErrors, + @JsonProperty("flattenStructuredData") Boolean flattenStructuredData) { + this.extensions = extensions == null ? + ImmutableList.of() : ImmutableList.copyOf(extensions); + this.maxErrors = maxErrors == null ? 10 : maxErrors; + this.flattenStructuredData = flattenStructuredData == null ? false : flattenStructuredData; + } public boolean getFlattenStructuredData() { return flattenStructuredData; @@ -47,25 +61,6 @@ public class SyslogFormatConfig implements FormatPluginConfig { return extensions; } - public void setExtensions(List<String> ext) { - this.extensions = ext; - } - - public void setExtension(String ext) { - if (this.extensions == null) { - this.extensions = new ArrayList<String>(); - } - this.extensions.add(ext); - } - - public void setMaxErrors(int errors) { - this.maxErrors = errors; - } - - public void setFlattenStructuredData(boolean flattenData) { - this.flattenStructuredData = flattenData; - } - @Override public boolean equals(Object obj) { if (this == obj) { @@ -75,13 +70,22 @@ public class SyslogFormatConfig implements FormatPluginConfig { return false; } SyslogFormatConfig other = (SyslogFormatConfig) obj; - return Objects.equal(extensions, other.extensions) && - Objects.equal(maxErrors, other.maxErrors) && - Objects.equal(flattenStructuredData, other.flattenStructuredData); + return Objects.equals(extensions, other.extensions) && + Objects.equals(maxErrors, other.maxErrors) && + Objects.equals(flattenStructuredData, other.flattenStructuredData); } @Override public int hashCode() { - return Arrays.hashCode(new Object[]{maxErrors, flattenStructuredData, extensions}); + return Objects.hash(maxErrors, flattenStructuredData, extensions); + } + + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("extensions", extensions) + .field("max errors", maxErrors) + .field("flatten structured data", flattenStructuredData) + .toString(); } } diff --git a/contrib/format-syslog/src/test/java/org/apache/drill/exec/store/syslog/TestSyslogFormat.java b/contrib/format-syslog/src/test/java/org/apache/drill/exec/store/syslog/TestSyslogFormat.java index b195d51..c7bd833 100644 --- a/contrib/format-syslog/src/test/java/org/apache/drill/exec/store/syslog/TestSyslogFormat.java +++ b/contrib/format-syslog/src/test/java/org/apache/drill/exec/store/syslog/TestSyslogFormat.java @@ -17,6 +17,7 @@ */ package org.apache.drill.exec.store.syslog; +import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -49,14 +50,11 @@ public class TestSyslogFormat extends ClusterTest { private static void defineSyslogPlugin() throws ExecutionSetupException { Map<String, FormatPluginConfig> formats = new HashMap<>(); - SyslogFormatConfig sampleConfig = new SyslogFormatConfig(); - sampleConfig.setExtension("syslog"); - formats.put("sample", sampleConfig); - - SyslogFormatConfig flattenedDataConfig = new SyslogFormatConfig(); - flattenedDataConfig.setExtension("syslog1"); - flattenedDataConfig.setFlattenStructuredData(true); - formats.put("flat", flattenedDataConfig); + formats.put("sample", new SyslogFormatConfig( + Collections.singletonList("syslog"), null, null)); + + formats.put("flat", new SyslogFormatConfig( + Collections.singletonList("syslog1"), null, true)); // Define a temporary plugin for the "cp" storage plugin. cluster.defineFormats("cp", formats); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillFile.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillFile.java index 761d4ac..24edd24 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillFile.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillFile.java @@ -17,11 +17,12 @@ */ package org.apache.drill.exec.dotdrill; -import org.apache.drill.common.config.LogicalPlanPersistence; import org.apache.drill.exec.store.dfs.DrillFileSystem; import org.apache.drill.exec.util.ImpersonationUtil; import org.apache.hadoop.fs.FileStatus; +import com.fasterxml.jackson.databind.ObjectMapper; + import org.apache.drill.shaded.guava.com.google.common.base.Preconditions; import java.io.IOException; @@ -29,13 +30,13 @@ import java.io.InputStream; public class DotDrillFile { - private FileStatus status; - private DotDrillType type; - private DrillFileSystem fs; + private final FileStatus status; + private final DotDrillType type; + private final DrillFileSystem fs; public static DotDrillFile create(DrillFileSystem fs, FileStatus status){ for(DotDrillType d : DotDrillType.values()){ - if(!status.isDir() && d.matches(status)){ + if(!status.isDirectory() && d.matches(status)){ return new DotDrillFile(fs, status, d); } } @@ -75,10 +76,10 @@ public class DotDrillFile { return fileName.substring(0, fileName.lastIndexOf(type.getEnding())); } - public View getView(LogicalPlanPersistence lpPersistence) throws IOException { + public View getView(ObjectMapper mapper) throws IOException { Preconditions.checkArgument(type == DotDrillType.VIEW); try(InputStream is = fs.open(status.getPath())){ - return lpPersistence.getMapper().readValue(is, View.class); + return mapper.readValue(is, View.class); } } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java index 64a0b68..14a69fc 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java @@ -188,7 +188,7 @@ public class ConvertCountToDirectScanRule extends RelOptRule { FormatPluginConfig formatConfig = formatSelection.getFormat(); if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) - && ((NamedFormatPluginConfig) formatConfig).name.equals("parquet")))) { + && ((NamedFormatPluginConfig) formatConfig).getName().equals("parquet")))) { return new ImmutablePair<>(false, null); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillTable.java index fd04805..104fb6a 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillTable.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillTable.java @@ -18,6 +18,7 @@ package org.apache.drill.exec.planner.logical; import java.io.IOException; +import java.util.Objects; import org.apache.calcite.adapter.enumerable.EnumerableTableScan; import org.apache.calcite.config.CalciteConnectionConfig; @@ -192,13 +193,7 @@ public abstract class DrillTable implements Table { @Override public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((selection == null) ? 0 : selection.hashCode()); - result = prime * result + ((storageEngineConfig == null) ? 0 : storageEngineConfig.hashCode()); - result = prime * result + ((storageEngineName == null) ? 0 : storageEngineName.hashCode()); - result = prime * result + ((userName == null) ? 0 : userName.hashCode()); - return result; + return Objects.hash(selection, storageEngineConfig, storageEngineName, userName); } @Override @@ -206,42 +201,13 @@ public abstract class DrillTable implements Table { if (this == obj) { return true; } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { + if (obj == null || getClass() != obj.getClass()) { return false; } DrillTable other = (DrillTable) obj; - if (selection == null) { - if (other.selection != null) { - return false; - } - } else if (!selection.equals(other.selection)) { - return false; - } - if (storageEngineConfig == null) { - if (other.storageEngineConfig != null) { - return false; - } - } else if (!storageEngineConfig.equals(other.storageEngineConfig)) { - return false; - } - if (storageEngineName == null) { - if (other.storageEngineName != null) { - return false; - } - } else if (!storageEngineName.equals(other.storageEngineName)) { - return false; - } - if (userName == null) { - if (other.userName != null) { - return false; - } - } else if (!userName.equals(other.userName)) { - return false; - } - return true; + return Objects.equals(selection, other.selection) && + Objects.equals(storageEngineConfig, other.storageEngineConfig) && + Objects.equals(storageEngineName, other.storageEngineName) && + Objects.equals(userName, other.userName); } - } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DynamicDrillTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DynamicDrillTable.java index 8059a30..d7720a3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DynamicDrillTable.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DynamicDrillTable.java @@ -26,9 +26,8 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; public class DynamicDrillTable extends DrillTable{ - static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DynamicDrillTable.class); - private RelDataTypeHolder holder = new RelDataTypeHolder(); + private final RelDataTypeHolder holder = new RelDataTypeHolder(); public DynamicDrillTable(StoragePlugin plugin, String storageEngineName, String userName, Object selection) { super(storageEngineName, plugin, userName, selection); @@ -39,9 +38,10 @@ public class DynamicDrillTable extends DrillTable{ } /** - * TODO: Same purpose as other constructor except the impersonation user is the user who is running the Drillbit - * process. Once we add impersonation to non-FileSystem storage plugins such as Hive, HBase etc, - * we can remove this constructor. + * TODO: Same purpose as other constructor except the impersonation user is + * the user who is running the Drillbit process. Once we add impersonation to + * non-FileSystem storage plugins such as Hive, HBase etc, we can remove this + * constructor. */ public DynamicDrillTable(StoragePlugin plugin, String storageEngineName, Object selection) { super(storageEngineName, plugin, selection); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/AnalyzeTableHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/AnalyzeTableHandler.java index 4c5af1e..ee08d07 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/AnalyzeTableHandler.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/AnalyzeTableHandler.java @@ -114,7 +114,7 @@ public class AnalyzeTableHandler extends DefaultSqlHandler { FormatPluginConfig formatConfig = formatSelection.getFormat(); if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) - && ((NamedFormatPluginConfig) formatConfig).name.equals("parquet")))) { + && ((NamedFormatPluginConfig) formatConfig).getName().equals("parquet")))) { return DrillStatsTable.notSupported(context, tableName); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java index a2607a3..00dedc1 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/RefreshMetadataHandler.java @@ -113,7 +113,7 @@ public class RefreshMetadataHandler extends DefaultSqlHandler { FormatPluginConfig formatConfig = formatSelection.getFormat(); if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) && - ((NamedFormatPluginConfig) formatConfig).name.equals("parquet")))) { + ((NamedFormatPluginConfig) formatConfig).getName().equals("parquet")))) { return notSupported(tableName); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/DrillbitContext.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/DrillbitContext.java index 0afd984..37519c7 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/DrillbitContext.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/DrillbitContext.java @@ -56,7 +56,6 @@ import java.util.concurrent.ExecutorService; import static org.apache.drill.shaded.guava.com.google.common.base.Preconditions.checkNotNull; public class DrillbitContext implements AutoCloseable { -// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DrillbitContext.class); private final BootStrapContext context; private final PhysicalPlanReader reader; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroFormatConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroFormatConfig.java index 6e8a9a3..8ba85e2 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroFormatConfig.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroFormatConfig.java @@ -17,10 +17,14 @@ */ package org.apache.drill.exec.store.avro; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; +import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.logical.FormatPluginConfig; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; import java.util.Collections; import java.util.List; @@ -33,7 +37,16 @@ import java.util.Objects; @JsonInclude(JsonInclude.Include.NON_DEFAULT) public class AvroFormatConfig implements FormatPluginConfig { - public List<String> extensions = Collections.singletonList("avro"); + private final List<String> extensions; + + @JsonCreator + public AvroFormatConfig(@JsonProperty("extensions") List<String> extensions) { + this.extensions = extensions == null + ? Collections.singletonList("avro") + : ImmutableList.copyOf(extensions); + } + + public List<String> getExtensions() { return extensions; } @Override public int hashCode() { @@ -51,4 +64,11 @@ public class AvroFormatConfig implements FormatPluginConfig { AvroFormatConfig that = (AvroFormatConfig) o; return Objects.equals(extensions, that.extensions); } + + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("extensions", extensions) + .toString(); + } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroFormatPlugin.java index 7e7ad56..5d958ed 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroFormatPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroFormatPlugin.java @@ -51,7 +51,7 @@ public class AvroFormatPlugin extends EasyFormatPlugin<AvroFormatConfig> { config.blockSplittable = true; config.compressible = false; config.supportsProjectPushdown = true; - config.extensions = formatConfig.extensions; + config.extensions = formatConfig.getExtensions(); config.fsConf = fsConf; config.defaultName = DEFAULT_NAME; config.readerOperatorType = CoreOperatorType.AVRO_SUB_SCAN_VALUE; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSystemPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSystemPlugin.java index 88ebe4d..6ed02f4 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSystemPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSystemPlugin.java @@ -29,7 +29,6 @@ import java.util.Set; import org.apache.calcite.schema.SchemaPlus; import org.apache.drill.common.JSONOptions; -import org.apache.drill.common.config.LogicalPlanPersistence; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.logical.FormatPluginConfig; @@ -77,12 +76,10 @@ public class FileSystemPlugin extends AbstractStoragePlugin { private final Map<FormatPluginConfig, FormatPlugin> formatPluginsByConfig; private final FileSystemConfig config; private final Configuration fsConf; - private final LogicalPlanPersistence lpPersistance; public FileSystemPlugin(FileSystemConfig config, DrillbitContext context, String name) throws ExecutionSetupException { super(context, name); this.config = config; - this.lpPersistance = context.getLpPersistence(); try { fsConf = new Configuration(); @@ -113,7 +110,7 @@ public class FileSystemPlugin extends AbstractStoragePlugin { for (Map.Entry<String, WorkspaceConfig> space : config.getWorkspaces().entrySet()) { factories.add(new WorkspaceSchemaFactory( this, space.getKey(), name, space.getValue(), matchers, - context.getLpPersistence(), context.getClasspathScan())); + context.getLpPersistence().getMapper(), context.getClasspathScan())); } } @@ -121,7 +118,7 @@ public class FileSystemPlugin extends AbstractStoragePlugin { if (noWorkspace || !config.getWorkspaces().containsKey(DEFAULT_WS_NAME)) { factories.add(new WorkspaceSchemaFactory(this, DEFAULT_WS_NAME, name, WorkspaceConfig.DEFAULT, matchers, - context.getLpPersistence(), context.getClasspathScan())); + context.getLpPersistence().getMapper(), context.getClasspathScan())); } this.schemaFactory = new FileSystemSchemaFactory(name, factories); @@ -186,7 +183,7 @@ public class FileSystemPlugin extends AbstractStoragePlugin { * @return a new FormatCreator instance */ protected FormatCreator newFormatCreator(FileSystemConfig config, DrillbitContext context, Configuration fsConf) { - return new FormatCreator(context, fsConf, config, context.getClasspathScan()); + return new FormatCreator(context, fsConf, config); } @Override @@ -223,7 +220,8 @@ public class FileSystemPlugin extends AbstractStoragePlugin { public AbstractGroupScan getPhysicalScan(String userName, JSONOptions selection, List<SchemaPath> columns, SessionOptionManager options, MetadataProviderManager metadataProviderManager) throws IOException { - FormatSelection formatSelection = selection.getWith(lpPersistance.getMapper(), FormatSelection.class); + FormatSelection formatSelection = selection.getWith( + context.getLpPersistence().getMapper(), FormatSelection.class); FormatPlugin plugin = getFormatPlugin(formatSelection.getFormat()); return plugin.getGroupScan(userName, formatSelection.getSelection(), columns, options, metadataProviderManager); @@ -250,7 +248,7 @@ public class FileSystemPlugin extends AbstractStoragePlugin { @Override public FormatPlugin getFormatPlugin(FormatPluginConfig config) { if (config instanceof NamedFormatPluginConfig) { - return formatCreator.getFormatPluginByName(((NamedFormatPluginConfig) config).name); + return formatCreator.getFormatPluginByName(((NamedFormatPluginConfig) config).getName()); } FormatPlugin plugin = formatPluginsByConfig.get(config); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatCreator.java index b981adf..d322de6 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatCreator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatCreator.java @@ -27,16 +27,17 @@ import java.util.Map; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.logical.FormatPluginConfig; import org.apache.drill.common.logical.StoragePluginConfig; -import org.apache.drill.common.scanner.persistence.ScanResult; import org.apache.drill.common.util.ConstructorChecker; import org.apache.drill.exec.server.DrillbitContext; import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Responsible for instantiating format plugins */ public class FormatCreator { - private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FormatCreator.class); + private static final Logger logger = LoggerFactory.getLogger(FormatCreator.class); private static final ConstructorChecker FORMAT_BASED = new ConstructorChecker(String.class, DrillbitContext.class, Configuration.class, StoragePluginConfig.class, FormatPluginConfig.class); @@ -82,12 +83,11 @@ public class FormatCreator { FormatCreator( DrillbitContext context, Configuration fsConf, - FileSystemConfig storageConfig, - ScanResult classpathScan) { + FileSystemConfig storageConfig) { this.context = context; this.fsConf = fsConf; this.storageConfig = storageConfig; - this.pluginClasses = classpathScan.getImplementations(FormatPlugin.class); + this.pluginClasses = context.getClasspathScan().getImplementations(FormatPlugin.class); this.configConstructors = initConfigConstructors(pluginClasses); Map<String, FormatPlugin> plugins = new HashMap<>(); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPluginOptionExtractor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPluginOptionExtractor.java index 8df96dd..edecc19 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPluginOptionExtractor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPluginOptionExtractor.java @@ -36,6 +36,9 @@ import org.apache.drill.exec.store.table.function.TableParamDef; import org.apache.drill.exec.store.table.function.TableSignature; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + import org.apache.drill.shaded.guava.com.google.common.annotations.VisibleForTesting; /** @@ -58,7 +61,7 @@ final class FormatPluginOptionExtractor { LogicalPlanPersistence.getSubTypes(scanResult, FormatPluginConfig.class); for (Class<? extends FormatPluginConfig> pluginConfigClass : pluginConfigClasses) { FormatPluginOptionsDescriptor optionsDescriptor = new FormatPluginOptionsDescriptor(pluginConfigClass); - result.put(optionsDescriptor.typeName.toLowerCase(), optionsDescriptor); + result.put(optionsDescriptor.getTypeName().toLowerCase(), optionsDescriptor); } this.optionsByTypeName = unmodifiableMap(result); } @@ -95,9 +98,10 @@ final class FormatPluginOptionExtractor { * the signature and parameters (it should be one of the signatures * returned by * {@link FormatPluginOptionExtractor#getTableSignatures(String, List)}) + * @param mapper * @return the config */ - FormatPluginConfig createConfigForTable(TableInstance t) { + FormatPluginConfig createConfigForTable(TableInstance t, ObjectMapper mapper, FormatPluginConfig baseConfig) { if (!t.sig.getSpecificParams().get(0).getName().equals("type")) { throw UserException.parseError() .message("unknown first param for %s", t.sig) @@ -120,6 +124,6 @@ final class FormatPluginOptionExtractor { .addContext("table", t.sig.getName()) .build(logger); } - return optionsDescriptor.createConfigForTable(t); + return optionsDescriptor.createConfigForTable(t, mapper, baseConfig); } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPluginOptionsDescriptor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPluginOptionsDescriptor.java index dbfcca4..6068aca 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPluginOptionsDescriptor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPluginOptionsDescriptor.java @@ -19,6 +19,7 @@ package org.apache.drill.exec.store.dfs; import static java.util.Collections.unmodifiableMap; +import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.util.ArrayList; @@ -33,18 +34,23 @@ import org.apache.drill.exec.store.dfs.WorkspaceSchemaFactory.TableInstance; import org.apache.drill.exec.store.table.function.TableParamDef; import org.apache.drill.exec.store.table.function.TableSignature; import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.fasterxml.jackson.annotation.JsonTypeName; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; /** * Describes the options for a format plugin * extracted from the FormatPluginConfig subclass */ final class FormatPluginOptionsDescriptor { - private static final Logger logger = org.slf4j.LoggerFactory.getLogger(FormatPluginOptionsDescriptor.class); + private static final Logger logger = LoggerFactory.getLogger(FormatPluginOptionsDescriptor.class); - final Class<? extends FormatPluginConfig> pluginConfigClass; - final String typeName; + protected final Class<? extends FormatPluginConfig> pluginConfigClass; + protected final String typeName; private final Map<String, TableParamDef> functionParamsByName; /** @@ -79,6 +85,8 @@ final class FormatPluginOptionsDescriptor { this.functionParamsByName = unmodifiableMap(paramsByName); } + public String getTypeName() { return typeName; } + /** * Returns the table function signature for this format plugin config class. * @@ -86,7 +94,7 @@ final class FormatPluginOptionsDescriptor { * @param tableParameters common table parameters to be included * @return the signature */ - TableSignature getTableSignature(String tableName, List<TableParamDef> tableParameters) { + protected TableSignature getTableSignature(String tableName, List<TableParamDef> tableParameters) { return TableSignature.of(tableName, tableParameters, params()); } @@ -100,7 +108,7 @@ final class FormatPluginOptionsDescriptor { /** * @return a readable String of the parameters and their names */ - String presentParams() { + protected String presentParams() { StringBuilder sb = new StringBuilder("("); List<TableParamDef> params = params(); for (int i = 0; i < params.size(); i++) { @@ -113,63 +121,211 @@ final class FormatPluginOptionsDescriptor { sb.append(")"); return sb.toString(); } - /** * Creates an instance of the FormatPluginConfig based on the passed parameters. * * @param t the signature and the parameters passed to the table function + * @param mapper * @return the corresponding config */ - FormatPluginConfig createConfigForTable(TableInstance t) { - List<TableParamDef> formatParams = t.sig.getSpecificParams(); - // Exclude common params values, leave only format related params - List<Object> formatParamsValues = t.params.subList(0, t.params.size() - t.sig.getCommonParams().size()); - - // Per the constructor, the first param is always "type" - TableParamDef typeParamDef = formatParams.get(0); - Object typeParam = formatParamsValues.get(0); - if (!typeParamDef.getName().equals("type") - || typeParamDef.getType() != String.class - || !(typeParam instanceof String) - || !typeName.equalsIgnoreCase((String)typeParam)) { - // if we reach here, there's a bug as all signatures generated start with a type parameter - throw UserException.parseError() - .message( - "This function signature is not supported: %s\n" - + "expecting %s", - t.presentParams(), this.presentParams()) - .addContext("table", t.sig.getName()) - .build(logger); + FormatPluginConfig createConfigForTable(TableInstance t, ObjectMapper mapper, FormatPluginConfig baseConfig) { + ConfigCreator configCreator = new ConfigCreator(t, mapper, baseConfig); + return configCreator.createNewStyle(); + } + + @Override + public String toString() { + return "OptionsDescriptor [pluginConfigClass=" + pluginConfigClass + ", typeName=" + typeName + + ", functionParamsByName=" + functionParamsByName + "]"; + } + + /** + * Implements a table function to specify a format config. Provides two + * Implementations. The first is the "legacy" version (Drill 1.17 and + * before), which relies on a niladic constructor and mutable fields. + * Since mutable fields conflicts with the desire for configs to be + * immutable, the newer version (Drill 1.8 and later) use JSON serialization + * to create a JSON object with the desired properties and to seriarialize + * that object to a config. Since Jackson allows creating JSON objects + * from an existing config, this newer method merges the existing plugin + * properties with those specified in the table function. Essentially + * the table function "inherits" any existing config, "overriding" only + * those properties which are specified. Prior to Drill 1.18, a table + * function inherited the default properties, even if there was an + * existing plugin for the target file. See DRILL-6168. The original + * behavior is retained in case we find we need to add an option to + * cause Drill to revert to the old behavior. + */ + private class ConfigCreator { + final TableInstance t; + final FormatPluginConfig baseConfig; + final List<TableParamDef> formatParams; + final List<Object> formatParamsValues; + final ObjectMapper mapper; + + public ConfigCreator(TableInstance table, ObjectMapper mapper, FormatPluginConfig baseConfig) { + this.t = table; + this.mapper = mapper; + + // Abundance of caution: if the base is not of the correct + // type, just ignore it to avoid introducing new errors. + // Drill prior to 1.18 didn't use a base config. + if (baseConfig == null || baseConfig.getClass() != pluginConfigClass) { + this.baseConfig = null; + } else { + this.baseConfig = baseConfig; + } + formatParams = t.sig.getSpecificParams(); + // Exclude common params values, leave only format related params + formatParamsValues = t.params.subList(0, t.params.size() - t.sig.getCommonParams().size()); } - FormatPluginConfig config; - try { - config = pluginConfigClass.newInstance(); - } catch (InstantiationException | IllegalAccessException e) { - throw UserException.parseError(e) + + public FormatPluginConfig createNewStyle() { + verifyType(); + ObjectNode configObject = makeConfigNode(); + applyParams(configObject); + // Do the following to visualize the merged object + // System.out.println(mapper.writeValueAsString(configObject)); + return nodeToConfig(configObject); + } + + /** + * Create a JSON node for the config: from the existing config + * if available, else an empty node. + */ + private ObjectNode makeConfigNode() { + if (baseConfig == null) { + ObjectNode configObject = mapper.createObjectNode(); + + // Type field is required to deserialize config + configObject.replace("type", + mapper.convertValue(typeName, JsonNode.class)); + return configObject; + } else { + return mapper.valueToTree(baseConfig); + } + } + + /** + * Replace any existing properties with the fields from the + * table function. + */ + private void applyParams(ObjectNode configObject) { + for (int i = 1; i < formatParamsValues.size(); i++) { + applyParam(configObject, i); + } + } + + private void applyParam(ObjectNode configObject, int i) { + Object param = paramValue(i); + // when null is passed, we leave the default defined in the config instance + if (param != null) { + configObject.replace(formatParams.get(i).getName(), + mapper.convertValue(param, JsonNode.class)); + } + } + + private Object paramValue(int i) { + Object param = formatParamsValues.get(i); + if (param != null && param instanceof String) { + // normalize Java literals, ex: \t, \n, \r + param = StringEscapeUtils.unescapeJava((String) param); + } + return param; + } + + /** + * Convert the JSON node to a format config. + */ + private FormatPluginConfig nodeToConfig(ObjectNode configObject) { + try { + return mapper.readerFor(pluginConfigClass).readValue(configObject); + } catch (IOException e) { + String jsonConfig; + try { + jsonConfig = mapper.writeValueAsString(configObject); + } catch (JsonProcessingException e1) { + jsonConfig = "unavailable: " + e1.getMessage(); + } + throw UserException.parseError(e) .message( "configuration for format of type %s can not be created (class: %s)", - this.typeName, pluginConfigClass.getName()) + typeName, pluginConfigClass.getName()) .addContext("table", t.sig.getName()) + .addContext("JSON configuration", jsonConfig) .build(logger); + } } - for (int i = 1; i < formatParamsValues.size(); i++) { - Object param = formatParamsValues.get(i); + + /** + * Creates a format plugin config in the style prior to + * Drill 1.8: binds parameters to public, mutable fields. + * However, this causes issues: fields should be immutable (DRILL-7612, DRILL-6672). + * Also, this style does not allow retaining some fields + * while customizing others. (DRILL-6168). + * @return + */ + @SuppressWarnings("unused") + public FormatPluginConfig createOldStyle() { + verifyType(); + FormatPluginConfig config = configInstance(); + bindParams(config); + return config; + } + + public void verifyType() { + + // Per the constructor, the first param is always "type" + TableParamDef typeParamDef = formatParams.get(0); + Object typeParam = formatParamsValues.get(0); + if (!typeParamDef.getName().equals("type") + || typeParamDef.getType() != String.class + || !(typeParam instanceof String) + || !typeName.equalsIgnoreCase((String)typeParam)) { + // if we reach here, there's a bug as all signatures generated start with a type parameter + throw UserException.parseError() + .message( + "This function signature is not supported: %s\n" + + "expecting %s", + t.presentParams(), presentParams()) + .addContext("table", t.sig.getName()) + .build(logger); + } + } + + public FormatPluginConfig configInstance() { + try { + return pluginConfigClass.newInstance(); + } catch (InstantiationException | IllegalAccessException e) { + throw UserException.parseError(e) + .message( + "configuration for format of type %s can not be created (class: %s)", + typeName, pluginConfigClass.getName()) + .addContext("table", t.sig.getName()) + .build(logger); + } + } + + private void bindParams(FormatPluginConfig config) { + for (int i = 1; i < formatParamsValues.size(); i++) { + bindParam(config, i); + } + } + + private void bindParam(FormatPluginConfig config, int i) { + Object param = paramValue(i); if (param == null) { // when null is passed, we leave the default defined in the config class - continue; - } - if (param instanceof String) { - // normalize Java literals, ex: \t, \n, \r - param = StringEscapeUtils.unescapeJava((String) param); + return; } TableParamDef paramDef = formatParams.get(i); - TableParamDef expectedParamDef = this.functionParamsByName.get(paramDef.getName()); + TableParamDef expectedParamDef = functionParamsByName.get(paramDef.getName()); if (expectedParamDef == null || expectedParamDef.getType() != paramDef.getType()) { throw UserException.parseError() .message( "The parameters provided are not applicable to the type specified:\n" + "provided: %s\nexpected: %s", - t.presentParams(), this.presentParams()) + t.presentParams(), presentParams()) .addContext("table", t.sig.getName()) .build(logger); } @@ -181,8 +337,8 @@ final class FormatPluginOptionsDescriptor { if (stringParam.length() != 1) { throw UserException.parseError() .message("Expected single character but was String: %s", stringParam) - .addContext("table", t.sig.getName()) - .addContext("parameter", paramDef.getName()) + .addContext("Table", t.sig.getName()) + .addContext("Parameter", paramDef.getName()) .build(logger); } param = stringParam.charAt(0); @@ -191,17 +347,10 @@ final class FormatPluginOptionsDescriptor { } catch (IllegalAccessException | NoSuchFieldException | SecurityException e) { throw UserException.parseError(e) .message("Can not set value %s to parameter %s: %s", param, paramDef.getName(), paramDef.getType()) - .addContext("table", t.sig.getName()) - .addContext("parameter", paramDef.getName()) + .addContext("Table", t.sig.getName()) + .addContext("Parameter", paramDef.getName()) .build(logger); } } - return config; - } - - @Override - public String toString() { - return "OptionsDescriptor [pluginConfigClass=" + pluginConfigClass + ", typeName=" + typeName - + ", functionParamsByName=" + functionParamsByName + "]"; } -} \ No newline at end of file +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/NamedFormatPluginConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/NamedFormatPluginConfig.java index 99a3728..50cdab6 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/NamedFormatPluginConfig.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/NamedFormatPluginConfig.java @@ -17,21 +17,30 @@ */ package org.apache.drill.exec.store.dfs; +import java.util.Objects; + +import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.logical.FormatPluginConfig; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; @JsonTypeName("named") public class NamedFormatPluginConfig implements FormatPluginConfig { - public String name; + private final String name; + + @JsonCreator + public NamedFormatPluginConfig(@JsonProperty("name") String name) { + this.name = name; + } + + public String getName() { return name; } @Override public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((name == null) ? 0 : name.hashCode()); - return result; + return Objects.hash(name); } @Override @@ -39,21 +48,17 @@ public class NamedFormatPluginConfig implements FormatPluginConfig { if (this == obj) { return true; } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { + if (obj == null || getClass() != obj.getClass()) { return false; } NamedFormatPluginConfig other = (NamedFormatPluginConfig) obj; - if (name == null) { - if (other.name != null) { - return false; - } - } else if (!name.equals(other.name)) { - return false; - } - return true; + return Objects.equals(name, other.name); } + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("name", name) + .toString(); + } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java index 6353fb0..21deb6d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java @@ -40,7 +40,6 @@ import org.apache.calcite.schema.Function; import org.apache.calcite.schema.Table; import org.apache.commons.lang3.SystemUtils; import org.apache.commons.lang3.tuple.Pair; -import org.apache.drill.common.config.LogicalPlanPersistence; import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.exceptions.UserException; @@ -107,7 +106,6 @@ public class WorkspaceSchemaFactory { private final String schemaName; private final FileSystemPlugin plugin; private final ObjectMapper mapper; - private final LogicalPlanPersistence logicalPlanPersistence; private final Path wsPath; private final FormatPluginOptionExtractor optionExtractor; @@ -118,13 +116,12 @@ public class WorkspaceSchemaFactory { String storageEngineName, WorkspaceConfig config, List<FormatMatcher> formatMatchers, - LogicalPlanPersistence logicalPlanPersistence, + ObjectMapper mapper, ScanResult scanResult) throws ExecutionSetupException { - this.logicalPlanPersistence = logicalPlanPersistence; + this.mapper = mapper; this.fsConf = plugin.getFsConf(); this.plugin = plugin; this.config = config; - this.mapper = logicalPlanPersistence.getMapper(); this.fileMatchers = Lists.newArrayList(); this.dirMatchers = Lists.newArrayList(); this.storageEngineName = storageEngineName; @@ -397,7 +394,7 @@ public class WorkspaceSchemaFactory { private View getView(DotDrillFile f) throws IOException { assert f.getType() == DotDrillType.VIEW; - return f.getView(logicalPlanPersistence); + return f.getView(mapper); } @Override @@ -594,62 +591,18 @@ public class WorkspaceSchemaFactory { @Override public DrillTable create(TableInstance key) { try { - final FileSelection fileSelection = FileSelection.create(getFS(), config.getLocation(), key.sig.getName(), config.allowAccessOutsideWorkspace()); - if (fileSelection == null) { + FileSelectionInspector inspector = new FileSelectionInspector(key); + if (inspector.fileSelection == null) { return null; } - boolean hasDirectories = fileSelection.containsDirectories(getFS()); - - if (key.sig.getParams().size() > 0) { - FileSelection newSelection = detectEmptySelection(fileSelection, hasDirectories); - - if (newSelection.isEmptyDirectory()) { - return new DynamicDrillTable(plugin, storageEngineName, schemaConfig.getUserName(), fileSelection); - } - - FormatPluginConfig formatConfig = optionExtractor.createConfigForTable(key); - FormatSelection selection = new FormatSelection(formatConfig, newSelection); - DrillTable drillTable = new DynamicDrillTable(plugin, storageEngineName, schemaConfig.getUserName(), selection); - setMetadataProviderManager(drillTable, key.sig.getName()); - - List<TableParamDef> commonParams = key.sig.getCommonParams(); - if (commonParams.isEmpty()) { - return drillTable; - } - // extract only common parameters related values - List<Object> paramValues = key.params.subList(key.params.size() - commonParams.size(), key.params.size()); - return applyFunctionParameters(drillTable, commonParams, paramValues); - } - - if (hasDirectories) { - for (final FormatMatcher matcher : dirMatchers) { - try { - DrillTable table = matcher.isReadable(getFS(), fileSelection, plugin, storageEngineName, schemaConfig); - setMetadataProviderManager(table, key.sig.getName()); - if (table != null) { - return table; - } - } catch (IOException e) { - logger.debug("File read failed.", e); - } - } - } - - FileSelection newSelection = detectEmptySelection(fileSelection, hasDirectories); - if (newSelection.isEmptyDirectory()) { - return new DynamicDrillTable(plugin, storageEngineName, schemaConfig.getUserName(), fileSelection); - } + DrillTable table = inspector.matchFormat(); - for (final FormatMatcher matcher : fileMatchers) { - DrillTable table = matcher.isReadable(getFS(), newSelection, plugin, storageEngineName, schemaConfig); - setMetadataProviderManager(table, key.sig.getName()); - if (table != null) { - return table; - } + if (key.sig.getParams().size() == 0) { + return table; + } else { + return parseTableFunction(key, inspector, table); } - return null; - } catch (AccessControlException e) { if (!schemaConfig.getIgnoreAuthErrors()) { logger.debug(e.getMessage()); @@ -660,10 +613,34 @@ public class WorkspaceSchemaFactory { } catch (IOException e) { logger.debug("Failed to create DrillTable with root {} and name {}", config.getLocation(), key, e); } - return null; } + private DrillTable parseTableFunction(TableInstance key, + FileSelectionInspector inspector, DrillTable table) { + FileSelection newSelection = inspector.selection(); + + if (newSelection.isEmptyDirectory()) { + return new DynamicDrillTable(plugin, storageEngineName, schemaConfig.getUserName(), + inspector.fileSelection); + } + + FormatPluginConfig baseConfig = inspector.formatMatch == null + ? null : inspector.formatMatch.getFormatPlugin().getConfig(); + FormatPluginConfig formatConfig = optionExtractor.createConfigForTable(key, mapper, baseConfig); + FormatSelection selection = new FormatSelection(formatConfig, newSelection); + DrillTable drillTable = new DynamicDrillTable(plugin, storageEngineName, schemaConfig.getUserName(), selection); + setMetadataProviderManager(drillTable, key.sig.getName()); + + List<TableParamDef> commonParams = key.sig.getCommonParams(); + if (commonParams.isEmpty()) { + return drillTable; + } + // extract only common parameters related values + List<Object> paramValues = key.params.subList(key.params.size() - commonParams.size(), key.params.size()); + return applyFunctionParameters(drillTable, commonParams, paramValues); + } + /** * Expands given file selection if it has directories. * If expanded file selection is null (i.e. directory is empty), sets empty directory status to true. @@ -845,6 +822,64 @@ public class WorkspaceSchemaFactory { ).collect(Collectors.toList()); } - } + /** + * Compute and retain file selection and format match properties used + * by multiple functions above. + */ + private class FileSelectionInspector { + private final TableInstance key; + private final DrillFileSystem fs; + public final FileSelection fileSelection; + public final boolean hasDirectories; + private FileSelection newSelection; + public FormatMatcher formatMatch; + + public FileSelectionInspector(TableInstance key) throws IOException { + this.key = key; + this.fs = getFS(); + this.fileSelection = FileSelection.create(fs, config.getLocation(), key.sig.getName(), config.allowAccessOutsideWorkspace()); + if (fileSelection == null) { + this.hasDirectories = false; + } else { + this.hasDirectories = fileSelection.containsDirectories(fs); + } + } + protected DrillTable matchFormat() throws IOException { + if (hasDirectories) { + for (final FormatMatcher matcher : dirMatchers) { + try { + DrillTable table = matcher.isReadable(getFS(), fileSelection, plugin, storageEngineName, schemaConfig); + if (table != null) { + formatMatch = matcher; + setMetadataProviderManager(table, key.sig.getName()); + return table; + } + } catch (IOException e) { + logger.debug("File read failed.", e); + } + } + } + + newSelection = detectEmptySelection(fileSelection, hasDirectories); + if (newSelection.isEmptyDirectory()) { + return new DynamicDrillTable(plugin, storageEngineName, schemaConfig.getUserName(), fileSelection); + } + + for (final FormatMatcher matcher : fileMatchers) { + DrillTable table = matcher.isReadable(getFS(), newSelection, plugin, storageEngineName, schemaConfig); + if (table != null) { + formatMatch = matcher; + setMetadataProviderManager(table, key.sig.getName()); + return table; + } + } + return null; + } + + public FileSelection selection() { + return newSelection != null ? newSelection : fileSelection; + } + } + } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java index c812999..7204686 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java @@ -21,7 +21,9 @@ import java.io.IOException; import java.io.OutputStream; import java.util.List; import java.util.Map; +import java.util.Objects; +import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.logical.FormatPluginConfig; import org.apache.drill.common.logical.StoragePluginConfig; @@ -50,7 +52,9 @@ import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; @@ -62,12 +66,15 @@ public class JSONFormatPlugin extends EasyFormatPlugin<JSONFormatConfig> { private static final boolean IS_COMPRESSIBLE = true; - public JSONFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig storageConfig) { - this(name, context, fsConf, storageConfig, new JSONFormatConfig()); + public JSONFormatPlugin(String name, DrillbitContext context, + Configuration fsConf, StoragePluginConfig storageConfig) { + this(name, context, fsConf, storageConfig, new JSONFormatConfig(null)); } - public JSONFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig config, JSONFormatConfig formatPluginConfig) { - super(name, context, fsConf, config, formatPluginConfig, true, false, false, IS_COMPRESSIBLE, formatPluginConfig.getExtensions(), DEFAULT_NAME); + public JSONFormatPlugin(String name, DrillbitContext context, + Configuration fsConf, StoragePluginConfig config, JSONFormatConfig formatPluginConfig) { + super(name, context, fsConf, config, formatPluginConfig, true, + false, false, IS_COMPRESSIBLE, formatPluginConfig.getExtensions(), DEFAULT_NAME); } @Override @@ -166,25 +173,25 @@ public class JSONFormatPlugin extends EasyFormatPlugin<JSONFormatConfig> { @JsonTypeName("json") public static class JSONFormatConfig implements FormatPluginConfig { - - public List<String> extensions = ImmutableList.of("json"); private static final List<String> DEFAULT_EXTS = ImmutableList.of("json"); + private final List<String> extensions; + + @JsonCreator + public JSONFormatConfig( + @JsonProperty("extensions") List<String> extensions) { + this.extensions = extensions == null ? + DEFAULT_EXTS : ImmutableList.copyOf(extensions); + } + @JsonInclude(JsonInclude.Include.NON_DEFAULT) public List<String> getExtensions() { - if (extensions == null) { - // when loading an old JSONFormatConfig that doesn't contain an "extensions" attribute - return DEFAULT_EXTS; - } return extensions; } @Override public int hashCode() { - int prime = 31; - int result = 1; - result = prime * result + ((extensions == null) ? 0 : extensions.hashCode()); - return result; + return Objects.hash(extensions); } @Override @@ -192,21 +199,18 @@ public class JSONFormatPlugin extends EasyFormatPlugin<JSONFormatConfig> { if (this == obj) { return true; } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { + if (obj == null || getClass() != obj.getClass()) { return false; } JSONFormatConfig other = (JSONFormatConfig) obj; - if (extensions == null) { - if (other.extensions != null) { - return false; - } - } else if (!extensions.equals(other.extensions)) { - return false; - } - return true; + return Objects.deepEquals(extensions, other.extensions); + } + + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("extensions", extensions) + .toString(); } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatConfig.java index e9f84ad..0572ca7 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatConfig.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatConfig.java @@ -17,7 +17,9 @@ */ package org.apache.drill.exec.store.easy.sequencefile; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; import org.apache.drill.common.PlanStringBuilder; @@ -29,7 +31,14 @@ import java.util.Objects; @JsonTypeName("sequencefile") @JsonInclude(JsonInclude.Include.NON_DEFAULT) public class SequenceFileFormatConfig implements FormatPluginConfig { - public List<String> extensions = ImmutableList.of(); + private final List<String> extensions; + + @JsonCreator + public SequenceFileFormatConfig( + @JsonProperty("extensions") List<String> extensions) { + this.extensions = extensions == null ? + ImmutableList.of() : ImmutableList.copyOf(extensions); + } public List<String> getExtensions() { return extensions; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java index 707bdeb..9e55448 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java @@ -44,7 +44,7 @@ import org.apache.hadoop.mapred.FileSplit; public class SequenceFileFormatPlugin extends EasyFormatPlugin<SequenceFileFormatConfig> { public SequenceFileFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig storageConfig) { - this(name, context, fsConf, storageConfig, new SequenceFileFormatConfig()); + this(name, context, fsConf, storageConfig, new SequenceFileFormatConfig(null)); } public SequenceFileFormatPlugin(String name, DrillbitContext context, Configuration fsConf, diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/TextFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/TextFormatPlugin.java index c0523e8..af710e3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/TextFormatPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/TextFormatPlugin.java @@ -17,7 +17,8 @@ */ package org.apache.drill.exec.store.easy.text; -import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonAlias; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonInclude.Include; import com.fasterxml.jackson.annotation.JsonProperty; @@ -59,10 +60,11 @@ import org.apache.drill.exec.store.easy.text.reader.CompliantTextBatchReader; import org.apache.drill.exec.store.easy.text.reader.TextParsingSettings; import org.apache.drill.exec.store.easy.text.writer.TextRecordWriter; import org.apache.drill.exec.store.schedule.CompleteFileWork; +import org.apache.drill.shaded.guava.com.google.common.base.Strings; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; import org.apache.hadoop.conf.Configuration; import java.io.IOException; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -111,38 +113,54 @@ public class TextFormatPlugin extends EasyFormatPlugin<TextFormatPlugin.TextForm @JsonInclude(Include.NON_DEFAULT) public static class TextFormatConfig implements FormatPluginConfig { - // TODO: Bad things happen if field change after created. - // Change all these to be private final, and add constructor. - // See DRILL-7612 - - public List<String> extensions = Collections.emptyList(); - public String lineDelimiter = "\n"; - public char fieldDelimiter = '\n'; - public char quote = '"'; - public char escape = '"'; - public char comment = '#'; - public boolean skipFirstLine; - public boolean extractHeader; + public final List<String> extensions; + public final String lineDelimiter; + public final char fieldDelimiter; + public final char quote; + public final char escape; + public final char comment; + public final boolean skipFirstLine; + public final boolean extractHeader; + + @JsonCreator + public TextFormatConfig( + @JsonProperty("extensions") List<String> extensions, + @JsonProperty("lineDelimiter") String lineDelimiter, + // Drill 1.17 and before used "delimiter" in the + // bootstrap storage plugins file, assume many instances + // exist in the field. + @JsonAlias("delimiter") + @JsonProperty("fieldDelimiter") String fieldDelimiter, + @JsonProperty("quote") String quote, + @JsonProperty("escape") String escape, + @JsonProperty("comment") String comment, + @JsonProperty("skipFirstLine") Boolean skipFirstLine, + @JsonProperty("extractHeader") Boolean extractHeader) { + this.extensions = extensions == null ? + ImmutableList.of() : ImmutableList.copyOf(extensions); + this.lineDelimiter = lineDelimiter == null ? "\n" : lineDelimiter; + this.fieldDelimiter = Strings.isNullOrEmpty(fieldDelimiter) ? ',' : fieldDelimiter.charAt(0); + this.quote = Strings.isNullOrEmpty(quote) ? '"' : quote.charAt(0); + this.escape = Strings.isNullOrEmpty(escape) ? '"' : escape.charAt(0); + this.comment = Strings.isNullOrEmpty(comment) ? '#' : comment.charAt(0); + this.skipFirstLine = skipFirstLine == null ? false : skipFirstLine; + this.extractHeader = extractHeader == null ? false : extractHeader; + } - public TextFormatConfig() { } + public TextFormatConfig() { + this(null, null, null, null, null, null, null, null); + } public List<String> getExtensions() { return extensions; } + public String getLineDelimiter() { return lineDelimiter; } + public char getFieldDelimiter() { return fieldDelimiter; } public char getQuote() { return quote; } public char getEscape() { return escape; } public char getComment() { return comment; } - public String getLineDelimiter() { return lineDelimiter; } - public char getFieldDelimiter() { return fieldDelimiter; } public boolean isSkipFirstLine() { return skipFirstLine; } - - @JsonIgnore + @JsonProperty("extractHeader") public boolean isHeaderExtractionEnabled() { return extractHeader; } - @Deprecated - @JsonProperty("delimiter") - public void setFieldDelimiter(char delimiter){ - this.fieldDelimiter = delimiter; - } - @Override public int hashCode() { return Objects.hash(extensions, lineDelimiter, fieldDelimiter, diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/reader/TextParsingSettings.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/reader/TextParsingSettings.java index 8fbcd9b..2028d24 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/reader/TextParsingSettings.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/reader/TextParsingSettings.java @@ -32,7 +32,7 @@ public class TextParsingSettings { private final long maxCharsPerColumn = TextFormatPlugin.MAX_CHARS_PER_COLUMN; private final byte normalizedNewLine = b('\n'); private final byte[] newLineDelimiter; - private final String lineSeparatorString = "\n"; + private final String lineSeparatorString; private boolean skipFirstLine; private final boolean headerExtractionEnabled; @@ -97,6 +97,7 @@ public class TextParsingSettings { this.quote = quoteChar; this.quoteEscape = quoteEscapeChar; this.newLineDelimiter = newlineDelim; + this.lineSeparatorString = new String(newLineDelimiter); this.delimiter = delimChar; this.comment = commentChar; this.ignoreLeadingWhitespace = ignoreLeadingWhitespace; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatConfig.java index c4c34b6..0aa7ece 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatConfig.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatConfig.java @@ -17,28 +17,43 @@ */ package org.apache.drill.exec.store.httpd; +import java.util.Objects; + +import org.apache.drill.common.PlanStringBuilder; +import org.apache.drill.common.logical.FormatPluginConfig; + +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; -import org.apache.drill.common.logical.FormatPluginConfig; @JsonTypeName("httpd") @JsonInclude(JsonInclude.Include.NON_DEFAULT) public class HttpdLogFormatConfig implements FormatPluginConfig { - public String logFormat; - public String timestampFormat = "dd/MMM/yyyy:HH:mm:ss ZZ"; + public static final String DEFAULT_TS_FORMAT = "dd/MMM/yyyy:HH:mm:ss ZZ"; + + // No extensions? + private final String logFormat; + private final String timestampFormat; + + @JsonCreator + public HttpdLogFormatConfig( + @JsonProperty("logFormat") String logFormat, + @JsonProperty("timestampFormat") String timestampFormat) { + this.logFormat = logFormat; + this.timestampFormat = timestampFormat == null + ? DEFAULT_TS_FORMAT : timestampFormat; + } /** - * @return the log formatting string. This string is the config string from httpd.conf or similar config file. + * @return the log formatting string. This string is the config string from + * httpd.conf or similar config file. */ public String getLogFormat() { return logFormat; } - public void setLogFormat(String format) { - this.logFormat = format; - } - /** * @return the timestampFormat */ @@ -46,19 +61,9 @@ public class HttpdLogFormatConfig implements FormatPluginConfig { return timestampFormat; } - /** - * Sets the time stamp format - * @param timestamp - */ - public void setTimestampFormat(String timestamp) { - this.timestampFormat = timestamp; - } - @Override public int hashCode() { - int result = logFormat != null ? logFormat.hashCode() : 0; - result = 31 * result + (timestampFormat != null ? timestampFormat.hashCode() : 0); - return result; + return Objects.hash(logFormat, timestampFormat); } @Override @@ -71,10 +76,15 @@ public class HttpdLogFormatConfig implements FormatPluginConfig { } HttpdLogFormatConfig that = (HttpdLogFormatConfig) o; + return Objects.equals(logFormat, that.logFormat) && + Objects.equals(timestampFormat, that.timestampFormat); + } - if (logFormat != null ? !logFormat.equals(that.logFormat) : that.logFormat != null) { - return false; - } - return timestampFormat != null ? timestampFormat.equals(that.timestampFormat) : that.timestampFormat == null; + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("log format", logFormat) + .field("timestamp format", timestampFormat) + .toString(); } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/image/ImageFormatConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/image/ImageFormatConfig.java index aa12580..a41a0d5 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/image/ImageFormatConfig.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/image/ImageFormatConfig.java @@ -24,7 +24,9 @@ import java.util.Objects; import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.logical.FormatPluginConfig; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonInclude.Include; import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; @@ -32,10 +34,27 @@ import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; @JsonTypeName("image") @JsonInclude(Include.NON_DEFAULT) public class ImageFormatConfig implements FormatPluginConfig { - public List<String> extensions = ImmutableList.of(); - public boolean fileSystemMetadata = true; - public boolean descriptive = true; - public String timeZone = null; + private final List<String> extensions; + private final boolean fileSystemMetadata; + private final boolean descriptive; + private final String timeZone; + + public ImageFormatConfig() { + this(null, null, null, null); + } + + @JsonCreator + public ImageFormatConfig( + @JsonProperty("extensions") List<String> extensions, + @JsonProperty("fileSystemMetadata") Boolean fileSystemMetadata, + @JsonProperty("descriptive") Boolean descriptive, + @JsonProperty("timeZone") String timeZone) { + this.extensions = extensions == null ? + ImmutableList.of() : ImmutableList.copyOf(extensions); + this.fileSystemMetadata = fileSystemMetadata == null ? true : fileSystemMetadata; + this.descriptive = descriptive == null ? true : descriptive; + this.timeZone = timeZone; + } public List<String> getExtensions() { return extensions; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java index 09c2a19..02bd29a 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java @@ -18,27 +18,38 @@ package org.apache.drill.exec.store.log; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; +import org.apache.drill.common.PlanStringBuilder; import org.apache.drill.common.logical.FormatPluginConfig; import org.apache.drill.shaded.guava.com.google.common.base.Objects; +import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; @JsonTypeName(LogFormatPlugin.PLUGIN_NAME) public class LogFormatConfig implements FormatPluginConfig { - // Fields must be public for table functions to work: DRILL-6672 - - public String regex; - public String extension; - public int maxErrors = 10; - public List<LogFormatField> schema; - - // Required to keep Jackson happy - public LogFormatConfig() { } + private final String regex; + private final String extension; + private final int maxErrors; + private final List<LogFormatField> schema; + + @JsonCreator + public LogFormatConfig( + @JsonProperty("regex") String regex, + @JsonProperty("extension") String extension, + @JsonProperty("maxErrors") Integer maxErrors, + @JsonProperty("schema") List<LogFormatField> schema) { + this.regex = regex; + this.extension = extension; + this.maxErrors = maxErrors == null ? 10 : maxErrors; + this.schema = schema == null + ? ImmutableList.of() : schema; + } public String getRegex() { return regex; @@ -56,26 +67,6 @@ public class LogFormatConfig implements FormatPluginConfig { return schema; } - public void setExtension(String ext) { - extension = ext; - } - - public void setMaxErrors(int errors) { - maxErrors = errors; - } - - public void setRegex(String regex) { - this.regex = regex; - } - - public void setSchema(List<LogFormatField> schema) { - this.schema = schema; - } - - public void initSchema() { - schema = new ArrayList<LogFormatField>(); - } - @Override public boolean equals(Object obj) { if (this == obj) { @@ -86,19 +77,19 @@ public class LogFormatConfig implements FormatPluginConfig { } LogFormatConfig other = (LogFormatConfig) obj; return Objects.equal(regex, other.regex) && - Objects.equal(maxErrors, other.maxErrors) && - Objects.equal(schema, other.schema) && - Objects.equal(extension, other.extension); + Objects.equal(maxErrors, other.maxErrors) && + Objects.equal(schema, other.schema) && + Objects.equal(extension, other.extension); } @Override public int hashCode() { - return Arrays.hashCode(new Object[]{regex, maxErrors, schema, extension}); + return Objects.hashCode(regex, maxErrors, schema, extension); } @JsonIgnore public boolean hasSchema() { - return schema != null && ! schema.isEmpty(); + return schema != null && ! schema.isEmpty(); } @JsonIgnore @@ -133,4 +124,14 @@ public class LogFormatConfig implements FormatPluginConfig { LogFormatField field = getField(fieldIndex); return field == null ? null : field.getFormat(); } + + @Override + public String toString() { + return new PlanStringBuilder(this) + .field("regex", regex) + .field("extension", extension) + .field("schema", schema) + .field("maxErrors", maxErrors) + .toString(); + } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatConfig.java index c18d7bc..4370b92 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatConfig.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatConfig.java @@ -17,8 +17,10 @@ */ package org.apache.drill.exec.store.parquet; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; @@ -30,8 +32,19 @@ import com.fasterxml.jackson.annotation.JsonTypeName; @JsonTypeName("parquet") @JsonInclude(JsonInclude.Include.NON_DEFAULT) public class ParquetFormatConfig implements FormatPluginConfig { - public boolean autoCorrectCorruptDates = true; - public boolean enableStringsSignedMinMax; + private final boolean autoCorrectCorruptDates; + private final boolean enableStringsSignedMinMax; + + public ParquetFormatConfig() { + this(true, false); + } + + @JsonCreator + public ParquetFormatConfig(@JsonProperty("autoCorrectCorruptDates") Boolean autoCorrectCorruptDates, + @JsonProperty("enableStringsSignedMinMax") boolean enableStringsSignedMinMax) { + this.autoCorrectCorruptDates = autoCorrectCorruptDates == null ? true : autoCorrectCorruptDates; + this.enableStringsSignedMinMax = enableStringsSignedMinMax; + } /** * @return true if auto correction of corrupt dates is enabled, false otherwise diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/pcap/PcapBatchReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/pcap/PcapBatchReader.java index b28880c..e724d18 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/pcap/PcapBatchReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/pcap/PcapBatchReader.java @@ -160,7 +160,7 @@ public class PcapBatchReader implements ManagedReader<FileSchemaNegotiator> { public PcapReaderConfig(PcapFormatPlugin plugin) { this.plugin = plugin; this.config = plugin.getConfig(); - this.sessionizeTCPStreams = config.sessionizeTCPStreams; + this.sessionizeTCPStreams = config.getSessionizeTCPStreams(); } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/pcap/PcapFormatConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/pcap/PcapFormatConfig.java index adf6379..1312151 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/pcap/PcapFormatConfig.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/pcap/PcapFormatConfig.java @@ -17,7 +17,9 @@ */ package org.apache.drill.exec.store.pcap; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.drill.common.PlanStringBuilder; @@ -29,17 +31,30 @@ import java.util.Objects; @JsonTypeName(PcapFormatPlugin.PLUGIN_NAME) public class PcapFormatConfig implements FormatPluginConfig { + private static final List<String> DEFAULT_EXTNS = ImmutableList.of(PcapFormatPlugin.PLUGIN_NAME); - public List<String> extensions = ImmutableList.of(PcapFormatPlugin.PLUGIN_NAME); + private final List<String> extensions; + private final boolean sessionizeTCPStreams; - @JsonInclude(JsonInclude.Include.NON_DEFAULT) - public boolean sessionizeTCPStreams; + @JsonCreator + public PcapFormatConfig( + @JsonProperty("extensions") List<String> extensions, + @JsonProperty("sessionizeTCPStreams") Boolean sessionizeTCPStreams) { + this.extensions = extensions == null ? + DEFAULT_EXTNS : ImmutableList.copyOf(extensions); + this.sessionizeTCPStreams = sessionizeTCPStreams == null ? false : sessionizeTCPStreams; + } @JsonInclude(JsonInclude.Include.NON_DEFAULT) public List<String> getExtensions() { return extensions; } + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public boolean getSessionizeTCPStreams() { + return sessionizeTCPStreams; + } + @Override public int hashCode() { return Objects.hash(extensions, sessionizeTCPStreams); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/util/StoragePluginTestUtils.java b/exec/java-exec/src/main/java/org/apache/drill/exec/util/StoragePluginTestUtils.java index ebbd87d..ace8ed0 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/util/StoragePluginTestUtils.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/util/StoragePluginTestUtils.java @@ -31,7 +31,7 @@ import org.apache.drill.exec.store.dfs.FileSystemConfig; import org.apache.drill.exec.store.dfs.WorkspaceConfig; import org.apache.drill.exec.store.easy.sequencefile.SequenceFileFormatConfig; -import org.apache.drill.exec.store.easy.text.TextFormatPlugin; +import org.apache.drill.exec.store.easy.text.TextFormatPlugin.TextFormatConfig; /** * Utility methods to speed up tests. Some of the production code currently @@ -103,31 +103,20 @@ public class StoragePluginTestUtils { Optional.ofNullable(fileSystemConfig.getFormats()) .ifPresent(newFormats::putAll); - TextFormatPlugin.TextFormatConfig textConfig = new TextFormatPlugin.TextFormatConfig(); - textConfig.extensions = ImmutableList.of("txt"); - textConfig.fieldDelimiter = '\u0000'; - newFormats.put("txt", textConfig); + newFormats.put("txt", new TextFormatConfig( + ImmutableList.of("txt"), null, "\u0000", null, null, null, null, null)); - TextFormatPlugin.TextFormatConfig ssvConfig = new TextFormatPlugin.TextFormatConfig(); - ssvConfig.extensions = ImmutableList.of("ssv"); - ssvConfig.fieldDelimiter = ' '; - newFormats.put("ssv", ssvConfig); + newFormats.put("ssv", new TextFormatConfig( + ImmutableList.of("ssv"), null, " ", null, null, null, null, null)); - TextFormatPlugin.TextFormatConfig psvConfig = new TextFormatPlugin.TextFormatConfig(); - psvConfig.extensions = ImmutableList.of("tbl"); - psvConfig.fieldDelimiter = '|'; - newFormats.put("psv", psvConfig); + newFormats.put("psv", new TextFormatConfig( + ImmutableList.of("tbl"), null, "|", null, null, null, null, null)); - SequenceFileFormatConfig seqConfig = new SequenceFileFormatConfig(); - seqConfig.extensions = ImmutableList.of("seq"); + SequenceFileFormatConfig seqConfig = new SequenceFileFormatConfig(ImmutableList.of("seq")); newFormats.put("sequencefile", seqConfig); - TextFormatPlugin.TextFormatConfig csvhtestConfig = new TextFormatPlugin.TextFormatConfig(); - csvhtestConfig.extensions = ImmutableList.of("csvh-test"); - csvhtestConfig.fieldDelimiter = ','; - csvhtestConfig.extractHeader = true; - csvhtestConfig.skipFirstLine = true; - newFormats.put("csvh-test", csvhtestConfig); + newFormats.put("csvh-test", new TextFormatConfig( + ImmutableList.of("csvh-test"), null, ",", null, null, null, true, true)); FileSystemConfig newFileSystemConfig = new FileSystemConfig( fileSystemConfig.getConnection(), diff --git a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json index a8df53e..4aa1754 100644 --- a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json +++ b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json @@ -19,17 +19,17 @@ "psv" : { "type" : "text", "extensions" : [ "tbl" ], - "delimiter" : "|" + "fieldDelimiter" : "|" }, "csv" : { "type" : "text", "extensions" : [ "csv" ], - "delimiter" : "," + "fieldDelimiter" : "," }, "tsv" : { "type" : "text", "extensions" : [ "tsv" ], - "delimiter" : "\t" + "fieldDelimiter" : "\t" }, "httpd" : { "type" : "httpd", @@ -91,17 +91,17 @@ "psv" : { "type" : "text", "extensions" : [ "tbl" ], - "delimiter" : "|" + "fieldDelimiter" : "|" }, "csv" : { "type" : "text", "extensions" : [ "csv" ], - "delimiter" : "," + "fieldDelimiter" : "," }, "tsv" : { "type" : "text", "extensions" : [ "tsv" ], - "delimiter" : "\t" + "fieldDelimiter" : "\t" }, "parquet" : { "type" : "parquet" @@ -126,7 +126,7 @@ "csvh" : { "type" : "text", "extensions" : [ "csvh" ], - "delimiter" : ",", + "fieldDelimiter" : ",", "extractHeader" : true } }, @@ -139,12 +139,12 @@ "csv" : { "type" : "text", "extensions" : [ "csv" ], - "delimiter" : "," + "fieldDelimiter" : "," }, "tsv" : { "type" : "text", "extensions" : [ "tsv" ], - "delimiter" : "\t" + "fieldDelimiter" : "\t" }, "json" : { "type" : "json", @@ -165,7 +165,7 @@ "csvh" : { "type" : "text", "extensions" : [ "csvh" ], - "delimiter" : ",", + "fieldDelimiter" : ",", "extractHeader" : true }, "image" : { diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestSchemaWithTableFunction.java b/exec/java-exec/src/test/java/org/apache/drill/TestSchemaWithTableFunction.java index f478c03..5b448a4 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestSchemaWithTableFunction.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestSchemaWithTableFunction.java @@ -142,7 +142,8 @@ public class TestSchemaWithTableFunction extends ClusterTest { client.alterSession(ExecConstants.OUTPUT_FORMAT_OPTION, "csv"); run("create table %s as select columns[0] as id, columns[1] as name from %s", table, sourceTable); - String query = "select * from table(%s(type=>'text', fieldDelimiter=>',', extractHeader=>true " + + // Inherits other properties from CSV + String query = "select * from table(%s(type=>'text', extractHeader=>true " + ",schema=>'inline=(`id` int)')) where id = 1"; testBuilder() diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestSelectWithOption.java b/exec/java-exec/src/test/java/org/apache/drill/TestSelectWithOption.java index 294a375..69f689e 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestSelectWithOption.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestSelectWithOption.java @@ -139,8 +139,7 @@ public class TestSelectWithOption extends ClusterTest { String tableName = genCSVTable("testTextLineDelimiterWithCarriageReturn", "1, a\r", "2, b\r"); - String lineDelimiter = new String(new char[]{92, 114, 92, 110}); // represents \r\n - testWithResult(format("select columns from table(%s(type=>'TeXT', lineDelimiter => '%s'))", tableName, lineDelimiter), + testWithResult(format("select columns from table(%s(type=>'TeXT', fieldDelimiter => '*', lineDelimiter => '\\r\\n'))", tableName), listOf("1, a"), listOf("2, b")); } @@ -221,18 +220,20 @@ public class TestSelectWithOption extends ClusterTest { String csvTableName = genCSVTable("testVariationsCSV", "a,b", "c|d"); + // The default field delimiter is ',', change it to something else. // Using the defaults in TextFormatConfig (the field delimiter is neither "," not "|") - testWithResult(format("select columns from table(%s (type => 'TeXT'))", csvTableName), + testWithResult(format("select columns from table(%s (type => 'TeXT', fieldDelimiter => '*'))", csvTableName), listOf("a,b"), listOf("c|d")); // the drill config file binds .csv to "," delimited testWithResult(format("select columns from %s", csvTableName), listOf("a", "b"), listOf("c|d")); - // setting the delimiter - testWithResult(format("select columns from table(%s (type => 'TeXT', fieldDelimiter => ','))", csvTableName), + // Default delimiter for csv + testWithResult(format("select columns from table(%s (type => 'TeXT'))", csvTableName), listOf("a", "b"), listOf("c|d")); + // Setting the delimiter testWithResult(format("select columns from table(%s (type => 'TeXT', fieldDelimiter => '|'))", csvTableName), listOf("a,b"), listOf("c", "d")); @@ -250,7 +251,7 @@ public class TestSelectWithOption extends ClusterTest { // CSV would require: // "{""columns"": [""f"",""g""]}" // A bug in older versions appeared to have the perverse - // effect of allowing the above to kinds-sorta work. + // effect of allowing the above to kinda-sorta work. String[] jsonQueries = { format("select columns from table(%s(type => 'JSON'))", jsonTableName), // we can use named format plugin configurations too! diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestTextWriter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestTextWriter.java index 6ad38bc..58bdb73 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestTextWriter.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestTextWriter.java @@ -56,32 +56,38 @@ public class TestTextWriter extends ClusterTest { Map<String, FormatPluginConfig> formats = new HashMap<>(); - TextFormatConfig csv = new TextFormatConfig(); - csv.extensions = Collections.singletonList("csv"); - csv.lineDelimiter = "\n"; - csv.fieldDelimiter = ','; - csv.quote = '"'; - csv.escape = '"'; - csv.extractHeader = true; - formats.put("csv", csv); - - TextFormatConfig tsv = new TextFormatConfig(); - tsv.extensions = Collections.singletonList("tsv"); - tsv.lineDelimiter = "\n"; - tsv.fieldDelimiter = '\t'; - tsv.quote = '"'; - tsv.escape = '"'; - tsv.extractHeader = true; - formats.put("tsv", tsv); - - TextFormatConfig custom = new TextFormatConfig(); - custom.extensions = Collections.singletonList("custom"); - custom.lineDelimiter = "!"; - custom.fieldDelimiter = '_'; - custom.quote = '$'; - custom.escape = '^'; - custom.extractHeader = true; - formats.put("custom", custom); + formats.put("csv", new TextFormatConfig( + Collections.singletonList("csv"), + "\n", // line delimiter + ",", // field delimiter + "\"", // quote + "\"", // escape + null, // comment + false, // skip first line + true // extract header + )); + + formats.put("tsv", new TextFormatConfig( + Collections.singletonList("tsv"), + "\n", // line delimiter + "\t", // field delimiter + "\"", // quote + "\"", // escape + null, // comment + false, // skip first line + true // extract header + )); + + formats.put("custom", new TextFormatConfig( + Collections.singletonList("custom"), + "!", // line delimiter + "_", // field delimiter + "$", // quote + "^", // escape + null, // comment + false, // skip first line + true // extract header + )); cluster.defineFormats("dfs", formats); } @@ -245,8 +251,16 @@ public class TestTextWriter extends ClusterTest { @Test public void testLineDelimiterLengthLimit() throws Exception { - TextFormatConfig incorrect = new TextFormatConfig(); - incorrect.lineDelimiter = "end"; + TextFormatConfig incorrect = new TextFormatConfig( + null, + "end", // line delimiter + null, // field delimiter + null, // quote + null, // escape + null, // comment + false, // skip first line + false // extract header + ); cluster.defineFormat("dfs", "incorrect", incorrect); client.alterSession(ExecConstants.OUTPUT_FORMAT_OPTION, "incorrect"); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestPluginRegistry.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestPluginRegistry.java index d3c42e1..bfff3f9 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestPluginRegistry.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestPluginRegistry.java @@ -574,8 +574,16 @@ public class TestPluginRegistry extends BaseTest { assertFalse(fsConfig.getFormats().containsKey("bsv")); // Add a new format - TextFormatConfig bsv = new TextFormatConfig(); - bsv.fieldDelimiter = '!'; + TextFormatConfig bsv = new TextFormatConfig( + null, + null, // line delimiter + "!", // field delimiter + null, // quote + null, // escape + null, // comment + false, // skip first line + false // extract header + ); registry.putFormatPlugin(CP_PLUGIN_NAME, "bsv", bsv); config = registry.getStoredConfig(CP_PLUGIN_NAME); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/BaseCsvTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/BaseCsvTest.java index a34279d..ff45968 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/BaseCsvTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/BaseCsvTest.java @@ -70,10 +70,16 @@ public class BaseCsvTest extends ClusterTest { .maxParallelization(maxParallelization)); // Set up CSV storage plugin using headers. - TextFormatConfig csvFormat = new TextFormatConfig(); - csvFormat.fieldDelimiter = ','; - csvFormat.skipFirstLine = skipFirstLine; - csvFormat.extractHeader = extractHeader; + TextFormatConfig csvFormat = new TextFormatConfig( + null, + null, // line delimiter + null, // field delimiter + null, // quote + null, // escape + null, // comment + skipFirstLine, + extractHeader + ); testDir = cluster.makeDataDir("data", "csv", csvFormat); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java index d6eb06b..c86ee52 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java @@ -45,8 +45,8 @@ public class TestHTTPDLogReader extends ClusterTest { ClusterTest.startCluster(ClusterFixture.builder(dirTestWatcher)); // Define a temporary format plugin for the "cp" storage plugin. - HttpdLogFormatConfig sampleConfig = new HttpdLogFormatConfig(); - sampleConfig.setLogFormat("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""); + HttpdLogFormatConfig sampleConfig = new HttpdLogFormatConfig( + "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"", null); cluster.defineFormat("cp", "sample", sampleConfig); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java index 0e47cf0..8b4d8a3 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.net.URL; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.commons.io.FileUtils; @@ -32,6 +33,7 @@ import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.rpc.RpcException; +import org.apache.drill.shaded.guava.com.google.common.collect.Lists; import org.apache.drill.test.BaseDirTestWatcher; import org.apache.drill.test.ClusterFixture; import org.apache.drill.test.ClusterTest; @@ -79,96 +81,102 @@ public class TestLogReader extends ClusterTest { // file ignores such files, so they'll never get committed. Instead, // make up a fake suffix. Map<String, FormatPluginConfig> formats = new HashMap<>(); - LogFormatConfig sampleConfig = new LogFormatConfig(); - sampleConfig.setExtension("log1"); - sampleConfig.setRegex(DATE_ONLY_PATTERN); - - sampleConfig.initSchema(); - sampleConfig.getSchema().add(new LogFormatField("year", "INT")); - sampleConfig.getSchema().add(new LogFormatField("month", "INT")); - sampleConfig.getSchema().add(new LogFormatField("day", "INT")); - formats.put("sample", sampleConfig); - - // Full Drill log parser definition. - LogFormatConfig logConfig = new LogFormatConfig(); - logConfig.setExtension("log1"); - logConfig.setRegex("(\\d\\d\\d\\d)-(\\d\\d)-(\\d\\d) " + - "(\\d\\d):(\\d\\d):(\\d\\d),\\d+ " + - "\\[([^]]*)] (\\w+)\\s+(\\S+) - (.*)"); - - logConfig.initSchema(); - logConfig.getSchema().add(new LogFormatField("year", "INT")); - logConfig.getSchema().add(new LogFormatField("month", "INT")); - logConfig.getSchema().add(new LogFormatField("day", "INT")); - logConfig.getSchema().add(new LogFormatField("hour", "INT")); - logConfig.getSchema().add(new LogFormatField("minute", "INT")); - logConfig.getSchema().add(new LogFormatField("second", "INT")); - logConfig.getSchema().add(new LogFormatField("thread")); - logConfig.getSchema().add(new LogFormatField("level")); - logConfig.getSchema().add(new LogFormatField("module")); - logConfig.getSchema().add(new LogFormatField("message")); - formats.put("drill-log", logConfig); - - //Set up additional configs to check the time/date formats - LogFormatConfig logDateConfig = new LogFormatConfig(); - logDateConfig.setExtension("log2"); - logDateConfig.setRegex("(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}),(\\d+)\\s\\[(\\w+)\\]\\s([A-Z]+)\\s(.+)"); - - logDateConfig.initSchema(); - logDateConfig.getSchema().add(new LogFormatField("entry_date", "TIMESTAMP", "yy-MM-dd hh:mm:ss")); - logDateConfig.getSchema().add(new LogFormatField("pid", "INT")); - logDateConfig.getSchema().add(new LogFormatField("location")); - logDateConfig.getSchema().add(new LogFormatField("message_type")); - logDateConfig.getSchema().add(new LogFormatField("message")); - - logDateConfig.setMaxErrors(3); - formats.put("date-log",logDateConfig); - - LogFormatConfig mysqlLogConfig = new LogFormatConfig(); - mysqlLogConfig.setExtension("sqllog"); - mysqlLogConfig.setRegex("(\\d{6})\\s(\\d{2}:\\d{2}:\\d{2})\\s+(\\d+)\\s(\\w+)\\s+(.+)"); - formats.put("mysql-log", mysqlLogConfig); - - // Firewall log file that requires date parsing - LogFormatConfig firewallConfig = new LogFormatConfig(); - firewallConfig.setRegex("(\\w{3}\\s\\d{1,2}\\s\\d{4}\\s\\d{2}:\\d{2}:\\d{2})\\s+(\\w+)" + - "\\[(\\d+)\\]:\\s(.*?(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}).*?)"); - firewallConfig.setExtension("ssdlog"); - firewallConfig.initSchema(); - firewallConfig.getSchema().add(new LogFormatField("eventDate", "TIMESTAMP", "MMM dd yyyy HH:mm:ss")); - firewallConfig.getSchema().add(new LogFormatField("process_name")); - firewallConfig.getSchema().add(new LogFormatField("pid", "INT")); - firewallConfig.getSchema().add(new LogFormatField("message")); - firewallConfig.getSchema().add(new LogFormatField("src_ip")); - formats.put("ssdlog", firewallConfig); + + formats.put("sample", dateOnlyConfig()); + formats.put("drill-log", drillLogConfig()); + formats.put("date-log", dateTimeConfig()); + formats.put("mysql-log", mySqlConfig()); + formats.put("ssdlog", firewallConfig()); // Define a temporary format plugin for the "cp" storage plugin. cluster.defineFormats("cp", formats); - // Config similar to the above, but with no type info. Types - // will be provided via the provided schema mechanism. Column names - // are required so that the format and provided schemas match up. - LogFormatConfig untypedConfig = new LogFormatConfig(); - - untypedConfig.setExtension("logu"); - untypedConfig.setRegex(DATE_ONLY_PATTERN); - - untypedConfig.initSchema(); - untypedConfig.getSchema().add(new LogFormatField("year")); - untypedConfig.getSchema().add(new LogFormatField("month")); - untypedConfig.getSchema().add(new LogFormatField("day")); - // Create a test directory we can write to. - schemaAndConfigDir = cluster.makeDataDir("sAndC", "logu", untypedConfig); + schemaAndConfigDir = cluster.makeDataDir("sAndC", "logu", untypedDateOnlyConfig()); // Empty configuration: regex and columns defined in the // provided schema - LogFormatConfig emptyConfig = new LogFormatConfig(); - emptyConfig.setExtension("loge"); + LogFormatConfig emptyConfig = new LogFormatConfig( + null, "loge", null, null); schemaOnlyDir = cluster.makeDataDir("SOnly", "loge", emptyConfig); tableFuncDir = cluster.makeDataDir("tf", "logf", emptyConfig); } + private static LogFormatConfig dateOnlyConfig() { + List<LogFormatField> schema = Lists.newArrayList( + new LogFormatField("year", "INT"), + new LogFormatField("month", "INT"), + new LogFormatField("day", "INT")); + return new LogFormatConfig( + DATE_ONLY_PATTERN, "log1", null, schema); + } + + // Config similar to the above, but with no type info. Types + // will be provided via the provided schema mechanism. Column names + // are required so that the format and provided schemas match up. + private static LogFormatConfig untypedDateOnlyConfig() { + List<LogFormatField> schema = Lists.newArrayList( + new LogFormatField("year"), + new LogFormatField("month"), + new LogFormatField("day")); + return new LogFormatConfig( + DATE_ONLY_PATTERN, "logu", null, schema); + } + + // Full Drill log parser definition. + private static LogFormatConfig drillLogConfig() { + String regex = "(\\d\\d\\d\\d)-(\\d\\d)-(\\d\\d) " + + "(\\d\\d):(\\d\\d):(\\d\\d),\\d+ " + + "\\[([^]]*)] (\\w+)\\s+(\\S+) - (.*)"; + List<LogFormatField> schema = Lists.newArrayList( + new LogFormatField("year", "INT"), + new LogFormatField("month", "INT"), + new LogFormatField("day", "INT"), + new LogFormatField("hour", "INT"), + new LogFormatField("minute", "INT"), + new LogFormatField("second", "INT"), + new LogFormatField("thread"), + new LogFormatField("level"), + new LogFormatField("module"), + new LogFormatField("message")); + return new LogFormatConfig( + regex, "log1", null, schema); + } + + //Set up additional configs to check the time/date formats + private static LogFormatConfig dateTimeConfig() { + String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}),(\\d+)\\s\\[(\\w+)\\]\\s([A-Z]+)\\s(.+)"; + List<LogFormatField> schema = Lists.newArrayList( + new LogFormatField("entry_date", "TIMESTAMP", "yy-MM-dd hh:mm:ss"), + new LogFormatField("pid", "INT"), + new LogFormatField("location"), + new LogFormatField("message_type"), + new LogFormatField("message")); + return new LogFormatConfig( + regex, "log2", 3, schema); + } + + private static LogFormatConfig mySqlConfig() { + String regex = "(\\d{6})\\s(\\d{2}:\\d{2}:\\d{2})\\s+(\\d+)\\s(\\w+)\\s+(.+)"; + return new LogFormatConfig( + regex, "sqllog", null, null); + } + + // Firewall log file that requires date parsing + private static LogFormatConfig firewallConfig() { + String regex = + "(\\w{3}\\s\\d{1,2}\\s\\d{4}\\s\\d{2}:\\d{2}:\\d{2})\\s+(\\w+)" + + "\\[(\\d+)\\]:\\s(.*?(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}).*?)"; + List<LogFormatField> schema = Lists.newArrayList( + new LogFormatField("eventDate", "TIMESTAMP", "MMM dd yyyy HH:mm:ss"), + new LogFormatField("process_name"), + new LogFormatField("pid", "INT"), + new LogFormatField("message"), + new LogFormatField("src_ip")); + return new LogFormatConfig( + regex, "ssdlog", null, schema); + } + @Test public void testWildcard() throws RpcException { String sql = "SELECT * FROM cp.`regex/simple.log1`"; @@ -730,14 +738,8 @@ public class TestLogReader extends ClusterTest { ObjectMapper mapper = new ObjectMapper(); assertTrue(mapper.canSerialize(LogFormatPlugin.class)); - LogFormatConfig sampleConfig = new LogFormatConfig(); - sampleConfig.setExtension("log1"); - sampleConfig.setRegex(DATE_ONLY_PATTERN); + LogFormatConfig sampleConfig = dateOnlyConfig(); - sampleConfig.initSchema(); - sampleConfig.getSchema().add(new LogFormatField("year", "INT")); - sampleConfig.getSchema().add(new LogFormatField("month", "INT")); - sampleConfig.getSchema().add(new LogFormatField("day", "INT")); String json = mapper.writeValueAsString(sampleConfig); LogFormatConfig result = mapper.readValue(json, LogFormatConfig.class); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetReaderConfig.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetReaderConfig.java index 69f9666..6b6341a 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetReaderConfig.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetReaderConfig.java @@ -96,6 +96,7 @@ public class TestParquetReaderConfig extends BaseTest { @Test public void testPriorityAssignmentForStringsSignedMinMax() throws Exception { + @SuppressWarnings("resource") SystemOptionManager options = new SystemOptionManager(DrillConfig.create()).init(); // use value from format config @@ -104,7 +105,7 @@ public class TestParquetReaderConfig extends BaseTest { assertEquals(formatConfig.isStringsSignedMinMaxEnabled(), readerConfig.enableStringsSignedMinMax()); // change format config value - formatConfig.enableStringsSignedMinMax = true; + formatConfig = new ParquetFormatConfig(true, true); readerConfig = ParquetReaderConfig.builder().withFormatConfig(formatConfig).build(); assertEquals(formatConfig.isStringsSignedMinMaxEnabled(), readerConfig.enableStringsSignedMinMax()); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/pcap/TestPcapEVFReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/pcap/TestPcapEVFReader.java index bc6eff9..5796fa8 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/pcap/TestPcapEVFReader.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/pcap/TestPcapEVFReader.java @@ -34,7 +34,7 @@ public class TestPcapEVFReader extends ClusterTest { @BeforeClass public static void setup() throws Exception { ClusterTest.startCluster(ClusterFixture.builder(dirTestWatcher)); - cluster.defineFormat("cp", "sample", new PcapFormatConfig()); + cluster.defineFormat("cp", "sample", new PcapFormatConfig(null, null)); } @Test diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/pcap/TestSessionizePCAP.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/pcap/TestSessionizePCAP.java index e0a0c6c..addc1a1 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/pcap/TestSessionizePCAP.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/pcap/TestSessionizePCAP.java @@ -43,9 +43,7 @@ public class TestSessionizePCAP extends ClusterTest { public static void setup() throws Exception { ClusterTest.startCluster(ClusterFixture.builder(dirTestWatcher)); - PcapFormatConfig sampleConfig = new PcapFormatConfig(); - sampleConfig.sessionizeTCPStreams = true; - + PcapFormatConfig sampleConfig = new PcapFormatConfig(null, true); cluster.defineFormat("cp", "pcap", sampleConfig); dirTestWatcher.copyResourceToRoot(Paths.get("store/pcap/")); } diff --git a/logical/src/main/java/org/apache/drill/common/JSONOptions.java b/logical/src/main/java/org/apache/drill/common/JSONOptions.java index dcb4700..5bfdb56 100644 --- a/logical/src/main/java/org/apache/drill/common/JSONOptions.java +++ b/logical/src/main/java/org/apache/drill/common/JSONOptions.java @@ -138,6 +138,7 @@ public class JSONOptions { return root; } + @SuppressWarnings("serial") public static class De extends StdDeserializer<JSONOptions> { public De() { @@ -158,9 +159,9 @@ public class JSONOptions { throw new IllegalArgumentException(String.format("Received something other than a JsonNode %s", n)); } } - } + @SuppressWarnings("serial") public static class Se extends StdSerializer<JSONOptions> { public Se() { @@ -175,9 +176,6 @@ public class JSONOptions { } else { jgen.writeTree(value.root); } - } - } - }