[ 
https://issues.apache.org/jira/browse/DRILL-5356?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15947291#comment-15947291
 ] 

ASF GitHub Bot commented on DRILL-5356:
---------------------------------------

Github user ppadma commented on a diff in the pull request:

    https://github.com/apache/drill/pull/789#discussion_r108667453
  
    --- Diff: 
exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetInternalsTest.java
 ---
    @@ -0,0 +1,154 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.exec.store.parquet;
    +
    +import static org.junit.Assert.*;
    +
    +import java.util.HashMap;
    +import java.util.Map;
    +
    +import org.apache.drill.TestBuilder;
    +import org.apache.drill.common.expression.SchemaPath;
    +import org.apache.drill.common.types.TypeProtos;
    +import org.apache.drill.common.types.Types;
    +import org.apache.drill.test.ClusterFixture;
    +import org.apache.drill.test.ClusterTest;
    +import org.apache.drill.test.FixtureBuilder;
    +import org.junit.BeforeClass;
    +import org.junit.Test;
    +
    +public class ParquetInternalsTest extends ClusterTest {
    +
    +  @BeforeClass
    +  public static void setup( ) throws Exception {
    +    FixtureBuilder builder = ClusterFixture.builder()
    +      // Set options, etc.
    +      ;
    +    startCluster(builder);
    +  }
    +
    +  @Test
    +  public void testFixedWidth() throws Exception {
    +    String sql = "SELECT l_orderkey, l_partkey, l_suppkey, l_linenumber, 
l_quantity\n" +
    +                 "FROM `cp`.`tpch/lineitem.parquet` LIMIT 20";
    +//    client.queryBuilder().sql(sql).printCsv();
    +
    +    Map<SchemaPath, TypeProtos.MajorType> typeMap = new HashMap<>();
    +    typeMap.put(TestBuilder.parsePath("l_orderkey"), 
Types.required(TypeProtos.MinorType.INT));
    +    typeMap.put(TestBuilder.parsePath("l_partkey"), 
Types.required(TypeProtos.MinorType.INT));
    +    typeMap.put(TestBuilder.parsePath("l_suppkey"), 
Types.required(TypeProtos.MinorType.INT));
    +    typeMap.put(TestBuilder.parsePath("l_linenumber"), 
Types.required(TypeProtos.MinorType.INT));
    +    typeMap.put(TestBuilder.parsePath("l_quantity"), 
Types.required(TypeProtos.MinorType.FLOAT8));
    +    client.testBuilder()
    +      .sqlQuery(sql)
    +      .unOrdered()
    +      .csvBaselineFile("parquet/expected/fixedWidth.csv")
    +      .baselineColumns("l_orderkey", "l_partkey", "l_suppkey", 
"l_linenumber", "l_quantity")
    +      .baselineTypes(typeMap)
    +      .build()
    +      .run();
    +  }
    +
    +
    +  @Test
    +  public void testVariableWidth() throws Exception {
    +    String sql = "SELECT s_name, s_address, s_phone, s_comment\n" +
    +                 "FROM `cp`.`tpch/supplier.parquet` LIMIT 20";
    +    client.queryBuilder().sql(sql).printCsv();
    --- End diff --
    
    do you want to comment this line ?


> Refactor Parquet Record Reader
> ------------------------------
>
>                 Key: DRILL-5356
>                 URL: https://issues.apache.org/jira/browse/DRILL-5356
>             Project: Apache Drill
>          Issue Type: Improvement
>    Affects Versions: 1.10.0, 1.11.0
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>            Priority: Minor
>             Fix For: 1.11.0
>
>
> The Parquet record reader class is a key part of Drill that has evolved over 
> time to become somewhat hard to follow.
> A number of us are working on Parquet-related tasks and find we have to spend 
> an uncomfortable amount of time trying to understand the code. In particular, 
> this writer needs to figure out how to convince the reader to provide 
> higher-density record batches.
> Rather than continue to decypher the complex code multiple times, this ticket 
> requests to refactor the code to make it functionally identical, but 
> structurally cleaner. The result will be faster time to value when working 
> with this code.
> This is a lower-priority change and will be coordinated with others working 
> on this code base. This ticket is only for the record reader class itself; it 
> does not include the various readers and writers that Parquet uses since 
> another project is actively modifying those classes.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to