[GitHub] spark pull request #14313: [SPARK-16674][SQL] Avoid per-record type dispatch...

jaceklaskowski Sat, 23 Jul 2016 12:53:57 -0700

Github user jaceklaskowski commented on a diff in the pull request:

    https://github.com/apache/spark/pull/14313#discussion_r71977344
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
 ---
    @@ -322,46 +322,134 @@ private[sql] class JDBCRDD(
         }
       }
     
    -  // Each JDBC-to-Catalyst conversion corresponds to a tag defined here so 
that
    -  // we don't have to potentially poke around in the Metadata once for 
every
    -  // row.
    -  // Is there a better way to do this?  I'd rather be using a type that
    -  // contains only the tags I define.
    -  abstract class JDBCConversion
    -  case object BooleanConversion extends JDBCConversion
    -  case object DateConversion extends JDBCConversion
    -  case class  DecimalConversion(precision: Int, scale: Int) extends 
JDBCConversion
    -  case object DoubleConversion extends JDBCConversion
    -  case object FloatConversion extends JDBCConversion
    -  case object IntegerConversion extends JDBCConversion
    -  case object LongConversion extends JDBCConversion
    -  case object BinaryLongConversion extends JDBCConversion
    -  case object StringConversion extends JDBCConversion
    -  case object TimestampConversion extends JDBCConversion
    -  case object BinaryConversion extends JDBCConversion
    -  case class ArrayConversion(elementConversion: JDBCConversion) extends 
JDBCConversion
    +  // A `JDBCConversion` is responsible for converting a value from 
`ResultSet`
    +  // to a value in a field for `InternalRow`.
    +  private type JDBCConversion = (ResultSet, Int) => Any
    +
    +  // This `ArrayElementConversion` is responsible for converting elements 
in
    +  // an array from `ResultSet`.
    +  private type ArrayElementConversion = (Object) => Any
     
       /**
    -   * Maps a StructType to a type tag list.
    +   * Maps a StructType to conversions for each type.
        */
       def getConversions(schema: StructType): Array[JDBCConversion] =
         schema.fields.map(sf => getConversions(sf.dataType, sf.metadata))
     
       private def getConversions(dt: DataType, metadata: Metadata): 
JDBCConversion = dt match {
    -    case BooleanType => BooleanConversion
    -    case DateType => DateConversion
    -    case DecimalType.Fixed(p, s) => DecimalConversion(p, s)
    -    case DoubleType => DoubleConversion
    -    case FloatType => FloatConversion
    -    case IntegerType => IntegerConversion
    -    case LongType => if (metadata.contains("binarylong")) 
BinaryLongConversion else LongConversion
    -    case StringType => StringConversion
    -    case TimestampType => TimestampConversion
    -    case BinaryType => BinaryConversion
    -    case ArrayType(et, _) => ArrayConversion(getConversions(et, metadata))
    +    case BooleanType =>
    +      (rs: ResultSet, pos: Int) => rs.getBoolean(pos)
    +
    +    case DateType =>
    +      (rs: ResultSet, pos: Int) =>
    +        // DateTimeUtils.fromJavaDate does not handle null value, so we 
need to check it.
    +        val dateVal = rs.getDate(pos)
    +        if (dateVal != null) {
    +          DateTimeUtils.fromJavaDate(dateVal)
    +        } else {
    +          null
    +        }
    +
    +    case DecimalType.Fixed(p, s) =>
    +      (rs: ResultSet, pos: Int) =>
    +        val decimalVal = rs.getBigDecimal(pos)
    +        if (decimalVal == null) {
    +          null
    +        } else {
    +          Decimal(decimalVal, p, s)
    +        }
    +
    +    case DoubleType =>
    +      (rs: ResultSet, pos: Int) => rs.getDouble(pos)
    +
    +    case FloatType =>
    +      (rs: ResultSet, pos: Int) => rs.getFloat(pos)
    +
    +    case IntegerType =>
    +      (rs: ResultSet, pos: Int) => rs.getInt(pos)
    +
    +    case LongType if metadata.contains("binarylong") =>
    +      (rs: ResultSet, pos: Int) =>
    +        val bytes = rs.getBytes(pos)
    +        var ans = 0L
    +        var j = 0
    +        while (j < bytes.size) {
    +          ans = 256 * ans + (255 & bytes(j))
    +          j = j + 1
    +        }
    +        ans
    +
    +    case LongType =>
    +      (rs: ResultSet, pos: Int) => rs.getLong(pos)
    +
    +    case StringType =>
    +      (rs: ResultSet, pos: Int) =>
    +        // TODO(davies): use getBytes for better performance, if the 
encoding is UTF-8
    +        UTF8String.fromString(rs.getString(pos))
    +
    +    case TimestampType =>
    +      (rs: ResultSet, pos: Int) =>
    +        val t = rs.getTimestamp(pos)
    +        if (t != null) {
    +          DateTimeUtils.fromJavaTimestamp(t)
    +        } else {
    +          null
    +        }
    +
    +    case BinaryType =>
    +      (rs: ResultSet, pos: Int) => rs.getBytes(pos)
    +
    +    case ArrayType(et, _) =>
    +      val elementConversion: ArrayElementConversion = 
getArrayElementConversion(et, metadata)
    +      (rs: ResultSet, pos: Int) =>
    +        val array = rs.getArray(pos).getArray
    +        if (array != null) {
    +          val data = elementConversion.apply(array)
    +          new GenericArrayData(data)
    +        } else {
    +          null
    +        }
    +
         case _ => throw new IllegalArgumentException(s"Unsupported type 
${dt.simpleString}")
       }
     
    +  private def getArrayElementConversion(
    +      dt: DataType,
    +      metadata: Metadata): ArrayElementConversion = {
    +    dt match {
    +      case TimestampType =>
    +        (array: Object) =>
    +          array.asInstanceOf[Array[java.sql.Timestamp]].map { timestamp =>
    +            nullSafeConvert(timestamp, DateTimeUtils.fromJavaTimestamp)
    +          }
    +
    +      case StringType =>
    +        (array: Object) =>
    +          array.asInstanceOf[Array[java.lang.String]]
    +            .map(UTF8String.fromString)
    +
    +      case DateType =>
    +        (array: Object) =>
    +          array.asInstanceOf[Array[java.sql.Date]].map { date =>
    +            nullSafeConvert(date, DateTimeUtils.fromJavaDate)
    +          }
    +
    +      case dt: DecimalType =>
    +        (array: Object) =>
    +          array.asInstanceOf[Array[java.math.BigDecimal]].map { decimal =>
    +            nullSafeConvert[java.math.BigDecimal](decimal, d => Decimal(d, 
dt.precision, dt.scale))
    +          }
    +
    +      case LongType if metadata.contains("binarylong") =>
    +        throw new IllegalArgumentException(s"Unsupported array element 
conversion.")
    --- End diff --
    
    Can you give more context info to the exception?



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #14313: [SPARK-16674][SQL] Avoid per-record type dispatch...

Reply via email to