[GitHub] carbondata pull request #2366: [CARBONDATA-2532][Integration] Carbon to supp...

jackylk Tue, 19 Jun 2018 00:02:01 -0700

Github user jackylk commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2366#discussion_r196313064
  
    --- Diff: 
integration/spark2/src/main/spark2.1/org/apache/spark/sql/CarbonVectorProxy.java
 ---
    @@ -0,0 +1,240 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.spark.sql;
    +
    +import java.math.BigInteger;
    +
    +import org.apache.carbondata.spark.vectorreader.CarbonSparkVectorReader;
    +import org.apache.spark.memory.MemoryMode;
    +import org.apache.spark.sql.catalyst.InternalRow;
    +import org.apache.spark.sql.execution.vectorized.ColumnVector;
    +import org.apache.spark.sql.execution.vectorized.ColumnarBatch;
    +import org.apache.spark.sql.types.CalendarIntervalType;
    +import org.apache.spark.sql.types.DataType;
    +import org.apache.spark.sql.types.Decimal;
    +import org.apache.spark.sql.types.DecimalType;
    +import org.apache.spark.sql.types.StructField;
    +import org.apache.spark.sql.types.StructType;
    +import org.apache.spark.unsafe.types.CalendarInterval;
    +import org.apache.spark.unsafe.types.UTF8String;
    +
    +public class CarbonVectorProxy implements CarbonSparkVectorReader {
    +
    +    private ColumnVector columnVector;
    +    private ColumnarBatch columnarBatch;
    +
    +    /**
    +     * Adapter class which handles the columnar vector reading of the 
carbondata
    +     * based on the spark ColumnVector and ColumnarBatch API. This proxy 
class
    +     * handles the complexity of spark 2.3 version related api changes 
since
    +     * spark ColumnVector and ColumnarBatch interfaces are still evolving.
    +     *
    +     * @param memMode       which represent the type onheap or offheap 
vector.
    +     * @param rowNum        rows number for vector reading
    +     * @param structFileds, metadata related to current schema of table.
    +     */
    +    public CarbonVectorProxy(MemoryMode memMode, int rowNum, StructField[] 
structFileds) {
    +        columnarBatch = ColumnarBatch.allocate(new 
StructType(structFileds), memMode, rowNum);
    +    }
    +
    +    public CarbonVectorProxy(MemoryMode memMode, StructType outputSchema, 
int rowNum) {
    +        columnarBatch = ColumnarBatch.allocate(outputSchema, memMode, 
rowNum);
    +    }
    +
    +    /**
    +     * Sets the number of rows in this batch.
    +     */
    +    public void setNumRows(int numRows) {
    +        columnarBatch.setNumRows(numRows);
    +    }
    +
    +    /**
    +     * Returns the number of rows for read, including filtered rows.
    +     */
    +    public int numRows() {
    +        return columnarBatch.capacity();
    +    }
    +
    +    /**
    +     * Called to close all the columns in this batch. It is not valid to 
access the data after
    +     * calling this. This must be called at the end to clean up memory 
allocations.
    +     */
    +    public void close() {
    +        columnarBatch.close();
    +    }
    +
    +    /**
    +     * Returns the row in this batch at `rowId`. Returned row is reused 
across calls.
    +     */
    +    public InternalRow getRow(int rowId) {
    +        return columnarBatch.getRow(rowId);
    +    }
    +
    +    /**
    +     * Returns the row in this batch at `rowId`. Returned row is reused 
across calls.
    +     */
    +    public Object getColumnarBatch() {
    +        return columnarBatch;
    +    }
    +
    +    /**
    +     * Resets this column for writing. The currently stored values are no 
longer accessible.
    +     */
    +    public void reset() {
    +        columnarBatch.reset();
    +    }
    +
    +
    --- End diff --
    
    remove extra empty line

---

[GitHub] carbondata pull request #2366: [CARBONDATA-2532][Integration] Carbon to supp...

Reply via email to