Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/14313#discussion_r72005487 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala --- @@ -322,46 +322,135 @@ private[sql] class JDBCRDD( } } - // Each JDBC-to-Catalyst conversion corresponds to a tag defined here so that - // we don't have to potentially poke around in the Metadata once for every - // row. - // Is there a better way to do this? I'd rather be using a type that - // contains only the tags I define. - abstract class JDBCConversion - case object BooleanConversion extends JDBCConversion - case object DateConversion extends JDBCConversion - case class DecimalConversion(precision: Int, scale: Int) extends JDBCConversion - case object DoubleConversion extends JDBCConversion - case object FloatConversion extends JDBCConversion - case object IntegerConversion extends JDBCConversion - case object LongConversion extends JDBCConversion - case object BinaryLongConversion extends JDBCConversion - case object StringConversion extends JDBCConversion - case object TimestampConversion extends JDBCConversion - case object BinaryConversion extends JDBCConversion - case class ArrayConversion(elementConversion: JDBCConversion) extends JDBCConversion + // A `JDBCConversion` is responsible for converting a value from `ResultSet` + // to a value in a field for `InternalRow`. + private type JDBCConversion = (ResultSet, Int) => Any + + // This `ArrayElementConversion` is responsible for converting elements in + // an array from `ResultSet`. + private type ArrayElementConversion = (Object) => Any /** - * Maps a StructType to a type tag list. + * Maps a StructType to conversions for each type. */ def getConversions(schema: StructType): Array[JDBCConversion] = schema.fields.map(sf => getConversions(sf.dataType, sf.metadata)) private def getConversions(dt: DataType, metadata: Metadata): JDBCConversion = dt match { - case BooleanType => BooleanConversion - case DateType => DateConversion - case DecimalType.Fixed(p, s) => DecimalConversion(p, s) - case DoubleType => DoubleConversion - case FloatType => FloatConversion - case IntegerType => IntegerConversion - case LongType => if (metadata.contains("binarylong")) BinaryLongConversion else LongConversion - case StringType => StringConversion - case TimestampType => TimestampConversion - case BinaryType => BinaryConversion - case ArrayType(et, _) => ArrayConversion(getConversions(et, metadata)) + case BooleanType => + (rs: ResultSet, pos: Int) => rs.getBoolean(pos) + + case DateType => + (rs: ResultSet, pos: Int) => + // DateTimeUtils.fromJavaDate does not handle null value, so we need to check it. + val dateVal = rs.getDate(pos) + if (dateVal != null) { + DateTimeUtils.fromJavaDate(dateVal) + } else { + null + } + + case DecimalType.Fixed(p, s) => + (rs: ResultSet, pos: Int) => + val decimalVal = rs.getBigDecimal(pos) + if (decimalVal != null) { + Decimal(decimalVal, p, s) + } else { + null + } + + case DoubleType => + (rs: ResultSet, pos: Int) => rs.getDouble(pos) + + case FloatType => + (rs: ResultSet, pos: Int) => rs.getFloat(pos) + + case IntegerType => + (rs: ResultSet, pos: Int) => rs.getInt(pos) + + case LongType if metadata.contains("binarylong") => + (rs: ResultSet, pos: Int) => + val bytes = rs.getBytes(pos) + var ans = 0L + var j = 0 + while (j < bytes.size) { + ans = 256 * ans + (255 & bytes(j)) + j = j + 1 + } + ans + + case LongType => + (rs: ResultSet, pos: Int) => rs.getLong(pos) + + case StringType => + (rs: ResultSet, pos: Int) => + // TODO(davies): use getBytes for better performance, if the encoding is UTF-8 + UTF8String.fromString(rs.getString(pos)) + + case TimestampType => + (rs: ResultSet, pos: Int) => + val t = rs.getTimestamp(pos) + if (t != null) { + DateTimeUtils.fromJavaTimestamp(t) + } else { + null + } + + case BinaryType => + (rs: ResultSet, pos: Int) => rs.getBytes(pos) + + case ArrayType(et, _) => + val elementConversion: ArrayElementConversion = getArrayElementConversion(et, metadata) + (rs: ResultSet, pos: Int) => + val array = rs.getArray(pos).getArray + if (array != null) { + val data = elementConversion.apply(array) + new GenericArrayData(data) + } else { + null + } + case _ => throw new IllegalArgumentException(s"Unsupported type ${dt.simpleString}") } + private def getArrayElementConversion( --- End diff -- I'd like to inline this method, as what the previous code does
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org