cgivre commented on a change in pull request #1778: Drill-7233: Format Plugin 
for HDF5
URL: https://github.com/apache/drill/pull/1778#discussion_r363286780
 
 

 ##########
 File path: 
contrib/format-hdf5/src/test/java/org/apache/drill/exec/store/hdf5/TestHDF5Format.java
 ##########
 @@ -0,0 +1,907 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.hdf5;
+
+import org.apache.drill.categories.RowSetTests;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.ExecTest;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.exec.rpc.RpcException;
+import org.apache.drill.exec.store.dfs.ZipCodec;
+import org.apache.drill.test.ClusterTest;
+import org.apache.drill.exec.physical.rowSet.RowSet;
+import org.apache.drill.exec.physical.rowSet.RowSetBuilder;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+@Category(RowSetTests.class)
+public class TestHDF5Format extends ClusterTest {
+
+  @BeforeClass
+  public static void setup() throws Exception {
+    ClusterTest.startCluster(ClusterFixture.builder(dirTestWatcher));
+
+    HDF5FormatConfig formatConfig = new HDF5FormatConfig();
+    cluster.defineFormat("dfs", "hdf5", formatConfig);
+    cluster.defineFormat("cp", "hdf5", formatConfig);
+    dirTestWatcher.copyResourceToRoot(Paths.get("hdf5/"));
+  }
+
+  @Test
+  public void testExplicitQuery() throws RpcException {
+    String sql = "SELECT path, data_type, file_name FROM cp.`hdf5/dset.h5`";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("path", TypeProtos.MinorType.VARCHAR, TypeProtos.DataMode.OPTIONAL)
+      .add("data_type", TypeProtos.MinorType.VARCHAR, 
TypeProtos.DataMode.OPTIONAL)
+      .add("file_name", TypeProtos.MinorType.VARCHAR, 
TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow("/dset", "DATASET", "dset.h5")
+      .build();
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testStarQuery() throws Exception {
+    List<Integer> t1 = Arrays.asList(1, 2, 3, 4, 5, 6);
+    List<Integer> t2 = Arrays.asList(7, 8, 9, 10, 11, 12);
+    List<Integer> t3 = Arrays.asList(13, 14, 15, 16, 17, 18);
+    List<Integer> t4 = Arrays.asList(19, 20, 21, 22, 23, 24);
+    List<List<Integer>> finalList = new ArrayList<>();
+    finalList.add(t1);
+    finalList.add(t2);
+    finalList.add(t3);
+    finalList.add(t4);
+
+    testBuilder()
+      .sqlQuery("SELECT * FROM cp.`hdf5/dset.h5`")
+      .unOrdered()
+      .baselineColumns("path", "data_type", "file_name", "int_data")
+      .baselineValues("/dset", "DATASET", "dset.h5", finalList)
+      .go();
+  }
+
+  @Test
+  public void testSimpleExplicitQuery() throws Exception {
+    List<Integer> t1 = Arrays.asList(1, 2, 3, 4, 5, 6);
+    List<Integer> t2 = Arrays.asList(7, 8, 9, 10, 11, 12);
+    List<Integer> t3 = Arrays.asList(13, 14, 15, 16, 17, 18);
+    List<Integer> t4 = Arrays.asList(19, 20, 21, 22, 23, 24);
+    List<List<Integer>> finalList = new ArrayList<>();
+    finalList.add(t1);
+    finalList.add(t2);
+    finalList.add(t3);
+    finalList.add(t4);
+
+    testBuilder()
+      .sqlQuery("SELECT path, data_type, file_name, int_data FROM 
cp.`hdf5/dset.h5`")
+      .ordered()
+      .baselineColumns("path", "data_type", "file_name", "int_data")
+      .baselineValues("/dset", "DATASET", "dset.h5", finalList)
+      .go();
+  }
+
+  @Test
+  public void testFlattenColumnQuery() throws RpcException {
+    String sql = "SELECT data[0] AS col1,\n" +
+            "data[1] as col2,\n" +
+            "data[2] as col3\n" +
+            "FROM \n" +
+            "(\n" +
+            "SELECT FLATTEN(double_data) AS data \n" +
+            "FROM cp.`hdf5/browsing.h5` WHERE path='/groupB/dmat'\n" +
+            ")";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("col1", TypeProtos.MinorType.FLOAT8, TypeProtos.DataMode.OPTIONAL)
+      .add("col2", TypeProtos.MinorType.FLOAT8, TypeProtos.DataMode.OPTIONAL)
+      .add("col3", TypeProtos.MinorType.FLOAT8, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(1.1, 2.2, 3.3)
+      .addRow(4.4, 5.5, 6.6)
+      .addRow(7.7, 8.8, 9.9)
+      .build();
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+ @Test
+  public void testFilterWithNonProjectedFieldQuery() throws Exception {
+    String sql = "SELECT `path` FROM cp.`hdf5/browsing.h5` WHERE 
data_type='DATASET'";
+
+   RowSet results = client.queryBuilder().sql(sql).rowSet();
+   TupleMetadata expectedSchema = new SchemaBuilder()
+     .add("path", TypeProtos.MinorType.VARCHAR, TypeProtos.DataMode.OPTIONAL)
+     .buildSchema();
+
+   RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+     .addRow("/groupA/date")
+     .addRow("/groupA/string")
+     .addRow("/groupB/dmat")
+     .addRow("/groupB/inarr")
+     .build();
+   new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testFloat32ScalarQuery() throws Exception {
+    String sql = "SELECT flatten(float32) AS float_col\n" +
+            "FROM cp.`hdf5/scalar.h5`\n" +
+            "WHERE path='/datatype/float32'";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("float_col", TypeProtos.MinorType.FLOAT8, 
TypeProtos.DataMode.REQUIRED)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-3.4028234663852886E38)
+      .addRow(1.0)
+      .addRow(2.0)
+      .addRow(3.0)
+      .addRow(4.0)
+      .addRow(5.0)
+      .addRow(6.0)
+      .addRow(7.0)
+      .addRow(8.0)
+      .addRow(3.4028234663852886E38)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testFlattenFloat32ScalarQuery() throws Exception {
+    String sql = "SELECT * FROM table(cp.`hdf5/scalar.h5` (type => 'hdf5', 
defaultPath => '/datatype/float32'))";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("float32", TypeProtos.MinorType.FLOAT8, 
TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-3.4028234663852886E38)
+      .addRow(1.0)
+      .addRow(2.0)
+      .addRow(3.0)
+      .addRow(4.0)
+      .addRow(5.0)
+      .addRow(6.0)
+      .addRow(7.0)
+      .addRow(8.0)
+      .addRow(3.4028234663852886E38)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testFloat64ScalarQuery() throws Exception {
+    String sql = "SELECT flatten(float64) AS float_col\n" +
+            "FROM cp.`hdf5/scalar.h5`\n" +
+            "WHERE path='/datatype/float64'";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("float_col", TypeProtos.MinorType.FLOAT8, 
TypeProtos.DataMode.REQUIRED)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-1.7976931348623157E308)
+      .addRow(1.0)
+      .addRow(2.0)
+      .addRow(3.0)
+      .addRow(4.0)
+      .addRow(5.0)
+      .addRow(6.0)
+      .addRow(7.0)
+      .addRow(8.0)
+      .addRow(1.7976931348623157E308)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testFlattenFloat64ScalarQuery() throws Exception {
+    String sql = "SELECT * FROM table(cp.`hdf5/scalar.h5` (type => 'hdf5', 
defaultPath => '/datatype/float64'))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("float64", TypeProtos.MinorType.FLOAT8, 
TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-1.7976931348623157E308)
+      .addRow(1.0)
+      .addRow(2.0)
+      .addRow(3.0)
+      .addRow(4.0)
+      .addRow(5.0)
+      .addRow(6.0)
+      .addRow(7.0)
+      .addRow(8.0)
+      .addRow(1.7976931348623157E308)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testInt32ScalarQuery() throws Exception {
+    String sql = "SELECT flatten(int32) AS int_col\n" +
+            "FROM cp.`hdf5/scalar.h5`\n" +
+            "WHERE path='/datatype/int32'";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("int_col", TypeProtos.MinorType.INT, TypeProtos.DataMode.REQUIRED)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648)
+      .addRow(1)
+      .addRow(2)
+      .addRow(3)
+      .addRow(4)
+      .addRow(5)
+      .addRow(6)
+      .addRow(7)
+      .addRow(8)
+      .addRow(2147483647)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testFlattenInt32ScalarQuery() throws Exception {
+    String sql = "SELECT * FROM table(cp.`hdf5/scalar.h5` (type => 'hdf5', 
defaultPath => '/datatype/int32'))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("int32", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648)
+      .addRow(1)
+      .addRow(2)
+      .addRow(3)
+      .addRow(4)
+      .addRow(5)
+      .addRow(6)
+      .addRow(7)
+      .addRow(8)
+      .addRow(2147483647)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testInt64ScalarQuery() throws Exception {
+    String sql = "SELECT flatten(int64) AS long_col\n" +
+            "FROM cp.`hdf5/scalar.h5`\n" +
+            "WHERE path='/datatype/int64'";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("long_col", TypeProtos.MinorType.BIGINT, 
TypeProtos.DataMode.REQUIRED)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-9223372036854775808L)
+      .addRow(1L)
+      .addRow(2L)
+      .addRow(3L)
+      .addRow(4L)
+      .addRow(5L)
+      .addRow(6L)
+      .addRow(7L)
+      .addRow(8L)
+      .addRow(9223372036854775807L)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+
+  }
+
+  @Test
+  public void testFlattenInt64ScalarQuery() throws Exception {
+    String sql = "SELECT * FROM table(cp.`hdf5/scalar.h5` (type => 'hdf5', 
defaultPath => '/datatype/int64'))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("int64", TypeProtos.MinorType.BIGINT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-9223372036854775808L)
+      .addRow(1L)
+      .addRow(2L)
+      .addRow(3L)
+      .addRow(4L)
+      .addRow(5L)
+      .addRow(6L)
+      .addRow(7L)
+      .addRow(8L)
+      .addRow(9223372036854775807L)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testStringScalarQuery() throws Exception {
+    String sql = "SELECT flatten(s10) AS string_col\n" +
+            "FROM cp.`hdf5/scalar.h5`\n" +
+            "WHERE path='/datatype/s10'";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("string_col", TypeProtos.MinorType.VARCHAR, 
TypeProtos.DataMode.REQUIRED)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow("a         ")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("abcdefghij")
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testFlattenStringScalarQuery() throws Exception {
+    String sql = "SELECT * FROM table(cp.`hdf5/scalar.h5` (type => 'hdf5', 
defaultPath => '/datatype/s10'))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("s10", TypeProtos.MinorType.VARCHAR, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow("a         ")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("abcdefghij")
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+
+  @Test
+  public void testUnicodeScalarQuery() throws Exception {
+    String sql = "SELECT flatten(unicode) AS string_col\n" +
+            "FROM cp.`hdf5/scalar.h5`\n" +
+            "WHERE path='/datatype/unicode'";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("string_col", TypeProtos.MinorType.VARCHAR, 
TypeProtos.DataMode.REQUIRED)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow("a")
+      .addRow("Ελληνικά")
+      .addRow("日本語")
+      .addRow("العربية")
+      .addRow("экземпляр")
+      .addRow("סקרן")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("abcdefghij")
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testUnicodeFlattenScalarQuery() throws Exception {
+    String sql = "SELECT * FROM table(cp.`hdf5/scalar.h5` (type => 'hdf5', 
defaultPath => '/datatype/unicode'))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("unicode", TypeProtos.MinorType.VARCHAR, 
TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow("a")
+      .addRow("Ελληνικά")
+      .addRow("日本語")
+      .addRow("العربية")
+      .addRow("экземпляр")
+      .addRow("סקרן")
+      .addRow("")
+      .addRow("")
+      .addRow("")
+      .addRow("abcdefghij")
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+
+  @Test
+  public void test1DScalarQuery() throws Exception {
+    String sql = "SELECT FLATTEN(`1D`) AS int_col\n" +
+            "FROM cp.`hdf5/scalar.h5`\n" +
+            "WHERE path='/nd/1D'\n" +
+            "LIMIT 5";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("int_col", TypeProtos.MinorType.INT, TypeProtos.DataMode.REQUIRED)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648)
+      .addRow(1)
+      .addRow(2)
+      .addRow(3)
+      .addRow(4)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void test1DFlattenScalarQuery() throws Exception {
+    String sql = "SELECT * FROM table(cp.`hdf5/scalar.h5` (type => 'hdf5', 
defaultPath => '/nd/1D')) LIMIT 5";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("1D", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648)
+      .addRow(1)
+      .addRow(2)
+      .addRow(3)
+      .addRow(4)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+
+  @Test
+  public void test2DFlattenScalarQuery() throws Exception {
+    String sql = "SELECT int_col_0, int_col_1 FROM table(cp.`hdf5/scalar.h5` 
(type => 'hdf5', defaultPath => '/nd/2D'))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("int_col_0", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648, 1)
+      .addRow(10, 11)
+      .addRow(20, 21)
+      .addRow(30, 31)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void test2DScalarQuery() throws Exception {
+    String sql = "SELECT int_data[0] AS col1,\n" +
+      "int_data[1] AS col2\n" +
+      "FROM\n" +
+      "(\n" +
+      "SELECT flatten(int_data) AS int_data\n" +
+      "FROM cp.`hdf5/scalar.h5`\n" +
+      "WHERE path='/nd/2D'\n" +
+      ") AS t1";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("col1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("col2", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648, 1)
+      .addRow(10, 11)
+      .addRow(20, 21)
+      .addRow(30, 31)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+
+  @Test
+  public void test3DScalarQuery() throws Exception {
+    String sql = "SELECT int_data[0] AS col1,\n" +
+      "int_data[1] AS col2\n" +
+      "FROM\n" +
+      "(\n" +
+      "SELECT flatten(int_data) AS int_data\n" +
+      "FROM cp.`hdf5/scalar.h5`\n" +
+      "WHERE path='/nd/3D'\n" +
+      ") AS t1";
+
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("col1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("col2", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648, 1)
+      .addRow(2, 3)
+      .addRow(4, 5)
+      .addRow(6, 7)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void test3DFlattenScalarQuery() throws Exception {
+    String sql = "SELECT int_col_0, int_col_1 FROM table(cp.`hdf5/scalar.h5` 
(type => 'hdf5', defaultPath => '/nd/3D'))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("int_col_0", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648, 1)
+      .addRow(2, 3)
+      .addRow(4, 5)
+      .addRow(6, 7)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void test4DScalarQuery() throws Exception {
+    String sql = "SELECT int_data[0] AS col1,\n" +
+            "int_data[1] AS col2\n" +
+            "FROM\n" +
+            "(\n" +
+            "SELECT flatten(int_data) AS int_data\n" +
+            "FROM cp.`hdf5/scalar.h5`\n" +
+            "WHERE path='/nd/4D'\n" +
+            ") AS t1";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("col1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("col2", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648, 1)
+      .addRow(2, 3)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void test4DFlattenScalarQuery() throws Exception {
+    String sql = "SELECT int_col_0, int_col_1 FROM table(cp.`hdf5/scalar.h5` 
(type => 'hdf5', defaultPath => '/nd/4D')) LIMIT 5";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("int_col_0", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(-2147483648, 1)
+      .addRow(2, 3)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testNonScalarIntQuery() throws Exception {
+    String sql = "SELECT flatten(t1.compound_data.`field 1`) as field_1\n" +
+            "FROM cp.`hdf5/non-scalar.h5` AS t1\n" +
+            "LIMIT 5";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("field_1", TypeProtos.MinorType.INT, TypeProtos.DataMode.REQUIRED)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(0)
+      .addRow(1)
+      .addRow(2)
+      .addRow(3)
+      .addRow(4)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+  @Test
+  public void testNonScalarFloatQuery() throws Exception {
+    String sql = "SELECT flatten(t1.compound_data.`field 2`) as field_2\n" +
+            "FROM cp.`hdf5/non-scalar.h5` AS t1\n" +
+            "LIMIT 5";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("field_2", TypeProtos.MinorType.FLOAT8, 
TypeProtos.DataMode.REQUIRED)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(0.0)
+      .addRow(1.0)
+      .addRow(2.0)
+      .addRow(3.0)
+      .addRow(4.0)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+ @Test
+  public void testNonScalarStringQuery() throws Exception {
+    String sql = "SELECT flatten(t1.compound_data.`field 3`) as field_3\n" +
+            "FROM cp.`hdf5/non-scalar.h5` AS t1\n" +
+            "LIMIT 5";
+
+   RowSet results = client.queryBuilder().sql(sql).rowSet();
+   TupleMetadata expectedSchema = new SchemaBuilder()
+     .add("field_3", TypeProtos.MinorType.VARCHAR, 
TypeProtos.DataMode.REQUIRED)
+     .buildSchema();
+
+   RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+     .addRow("0")
+     .addRow("1")
+     .addRow("2")
+     .addRow("3")
+     .addRow("4")
+     .build();
+
+   new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testAttributes() throws Exception {
+    String sql = "SELECT path, file_name\n" +
+            "FROM cp.`hdf5/browsing.h5` AS t1 WHERE t1.attributes.`important` 
= false";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("path", TypeProtos.MinorType.VARCHAR, TypeProtos.DataMode.OPTIONAL)
+      .add("file_name", TypeProtos.MinorType.VARCHAR, 
TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow("/groupB", "browsing.h5")
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testStarProjectDatasetQuery() throws Exception {
+    String sql = "SELECT * \n"+
+      "FROM \n" +
+      "table(cp.`hdf5/dset.h5` (type => 'hdf5', defaultPath => '/dset'))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("int_col_0", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_2", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_3", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_4", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_5", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(1,2,3,4,5,6)
+      .addRow(7,8,9,10,11,12)
+      .addRow(13,14,15,16,17,18)
+      .addRow(19,20,21,22,23,24)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testExplicitProjectDatasetQuery() throws Exception {
+    String sql = "SELECT int_col_0, int_col_1, int_col_2, int_col_3, 
int_col_4\n"+
+      "FROM \n" +
+      "table(cp.`hdf5/dset.h5` (type => 'hdf5', defaultPath => '/dset'))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("int_col_0", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_2", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_3", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("int_col_4", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(1,2,3,4,5)
+      .addRow(7,8,9,10,11)
+      .addRow(13,14,15,16,17)
+      .addRow(19,20,21,22,23)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testCompoundStarQuery() throws Exception {
+
+    String sql = "SELECT * FROM table(cp.`hdf5/non-scalar.h5` (type => 'hdf5', 
defaultPath => '/compound')) LIMIT 5";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("field_1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("field_2", TypeProtos.MinorType.FLOAT8, 
TypeProtos.DataMode.OPTIONAL)
+      .add("field_3", TypeProtos.MinorType.VARCHAR, 
TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(0, 0.0, "0")
+      .addRow(1, 1.0, "1")
+      .addRow(2, 2.0, "2")
+      .addRow(3, 3.0, "3")
+      .addRow(4, 4.0, "4")
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testCompoundExplicitQuery() throws Exception {
+
+    String sql = "SELECT `field_1`, `field_3` FROM 
table(cp.`hdf5/non-scalar.h5` (type => 'hdf5', defaultPath => '/compound')) 
LIMIT 5";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("field_1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .add("field_3", TypeProtos.MinorType.VARCHAR, 
TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(0, "0")
+      .addRow(1, "1")
+      .addRow(2, "2")
+      .addRow(3, "3")
+      .addRow(4, "4")
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testCompoundExplicitQuery2() throws Exception {
+
+    String sql = "SELECT `field_1` FROM table(cp.`hdf5/non-scalar.h5` (type => 
'hdf5', defaultPath => '/compound')) LIMIT 5";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("field_1", TypeProtos.MinorType.INT, TypeProtos.DataMode.OPTIONAL)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(0)
+      .addRow(1)
+      .addRow(2)
+      .addRow(3)
+      .addRow(4)
+      .build();
+
+    new RowSetComparison(expected).unorderedVerifyAndClearAll(results);
+  }
+
+  @Test
+  public void testSerDe() throws Exception {
+    String sql = "SELECT COUNT(*) FROM cp.`hdf5/dset.h5`";
+    String plan = queryBuilder().sql(sql).explainJson();
+    long cnt = queryBuilder().physical(plan).singletonLong();
+    assertEquals("Counts should match",1L, cnt);
+  }
+
+  @Test
+  public void testExplicitQueryWithCompressedFile() throws RpcException, 
IOException {
 
 Review comment:
   I'll clarify this in the documentation, but HDF5 is like a mini-filesystem. 
It supports compressed fields within that filesystem, however, the plugin does 
not support reading them yet. Drill WILL however read a compressed HDF5 file, 
which is what that unit test is for.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to