[GitHub] spark pull request #21556: [SPARK-24549][SQL] Support Decimal type push down...

rdblue Wed, 11 Jul 2018 09:21:43 -0700

Github user rdblue commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21556#discussion_r201756667
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
 ---
    @@ -37,41 +39,64 @@ import org.apache.spark.unsafe.types.UTF8String
     /**
      * Some utility function to convert Spark data source filters to Parquet 
filters.
      */
    -private[parquet] class ParquetFilters(pushDownDate: Boolean, 
pushDownStartWith: Boolean) {
    +private[parquet] class ParquetFilters(
    +    pushDownDate: Boolean,
    +    pushDownDecimal: Boolean,
    +    pushDownStartWith: Boolean) {
     
       private case class ParquetSchemaType(
           originalType: OriginalType,
           primitiveTypeName: PrimitiveTypeName,
    -      decimalMetadata: DecimalMetadata)
    -
    -  private val ParquetBooleanType = ParquetSchemaType(null, BOOLEAN, null)
    -  private val ParquetByteType = ParquetSchemaType(INT_8, INT32, null)
    -  private val ParquetShortType = ParquetSchemaType(INT_16, INT32, null)
    -  private val ParquetIntegerType = ParquetSchemaType(null, INT32, null)
    -  private val ParquetLongType = ParquetSchemaType(null, INT64, null)
    -  private val ParquetFloatType = ParquetSchemaType(null, FLOAT, null)
    -  private val ParquetDoubleType = ParquetSchemaType(null, DOUBLE, null)
    -  private val ParquetStringType = ParquetSchemaType(UTF8, BINARY, null)
    -  private val ParquetBinaryType = ParquetSchemaType(null, BINARY, null)
    -  private val ParquetDateType = ParquetSchemaType(DATE, INT32, null)
    +      length: Int,
    +      decimalMeta: DecimalMetadata)
    +
    +  private val ParquetBooleanType = ParquetSchemaType(null, BOOLEAN, 0, 
null)
    +  private val ParquetByteType = ParquetSchemaType(INT_8, INT32, 0, null)
    +  private val ParquetShortType = ParquetSchemaType(INT_16, INT32, 0, null)
    +  private val ParquetIntegerType = ParquetSchemaType(null, INT32, 0, null)
    +  private val ParquetLongType = ParquetSchemaType(null, INT64, 0, null)
    +  private val ParquetFloatType = ParquetSchemaType(null, FLOAT, 0, null)
    +  private val ParquetDoubleType = ParquetSchemaType(null, DOUBLE, 0, null)
    +  private val ParquetStringType = ParquetSchemaType(UTF8, BINARY, 0, null)
    +  private val ParquetBinaryType = ParquetSchemaType(null, BINARY, 0, null)
    +  private val ParquetDateType = ParquetSchemaType(DATE, INT32, 0, null)
     
       private def dateToDays(date: Date): SQLDate = {
         DateTimeUtils.fromJavaDate(date)
       }
     
    +  private def decimalToInt32(decimal: JBigDecimal): Integer = 
decimal.unscaledValue().intValue()
    +
    +  private def decimalToInt64(decimal: JBigDecimal): JLong = 
decimal.unscaledValue().longValue()
    +
    +  private def decimalToByteArray(decimal: JBigDecimal, numBytes: Int): 
Binary = {
    +    val decimalBuffer = new Array[Byte](numBytes)
    +    val bytes = decimal.unscaledValue().toByteArray
    +
    +    val fixedLengthBytes = if (bytes.length == numBytes) {
    +      bytes
    +    } else {
    +      val signByte = if (bytes.head < 0) -1: Byte else 0: Byte
    +      java.util.Arrays.fill(decimalBuffer, 0, numBytes - bytes.length, 
signByte)
    +      System.arraycopy(bytes, 0, decimalBuffer, numBytes - bytes.length, 
bytes.length)
    +      decimalBuffer
    +    }
    +    Binary.fromReusedByteArray(fixedLengthBytes, 0, numBytes)
    --- End diff --
    
    This byte array is not reused, it is allocated each time this function 
runs. This should use the `fromConstantByteArray` variant.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21556: [SPARK-24549][SQL] Support Decimal type push down...

Reply via email to