Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]

2024-05-07 Thread via GitHub


viirya commented on code in PR #39615:
URL: https://github.com/apache/spark/pull/39615#discussion_r1593353161


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala:
##
@@ -166,317 +148,58 @@ object JavaTypeInference {
   .filter(_.getReadMethod != null)
   }
 
-  private def getJavaBeanReadableAndWritableProperties(
-  beanClass: Class[_]): Array[PropertyDescriptor] = {
-getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null)
-  }
-
-  private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]]
-val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]])
-val iteratorType = iterableSuperType.resolveType(iteratorReturnType)
-iteratorType.resolveType(nextReturnType)
-  }
-
-  private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], 
TypeToken[_]) = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]]
-val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]])
-val keyType = elementType(mapSuperType.resolveType(keySetReturnType))
-val valueType = elementType(mapSuperType.resolveType(valuesReturnType))
-keyType -> valueType
-  }
-
-  /**
-   * Returns the Spark SQL DataType for a given java class.  Where this is not 
an exact mapping
-   * to a native type, an ObjectType is returned.
-   *
-   * Unlike `inferDataType`, this function doesn't do any massaging of types 
into the Spark SQL type
-   * system.  As a result, ObjectType will be returned for things like boxed 
Integers.
-   */
-  private def inferExternalType(cls: Class[_]): DataType = cls match {
-case c if c == java.lang.Boolean.TYPE => BooleanType
-case c if c == java.lang.Byte.TYPE => ByteType
-case c if c == java.lang.Short.TYPE => ShortType
-case c if c == java.lang.Integer.TYPE => IntegerType
-case c if c == java.lang.Long.TYPE => LongType
-case c if c == java.lang.Float.TYPE => FloatType
-case c if c == java.lang.Double.TYPE => DoubleType
-case c if c == classOf[Array[Byte]] => BinaryType
-case _ => ObjectType(cls)
-  }
-
-  /**
-   * Returns an expression that can be used to deserialize a Spark SQL 
representation to an object
-   * of java bean `T` with a compatible schema.  The Spark SQL representation 
is located at ordinal
-   * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have 
their fields accessed
-   * using `UnresolvedExtractValue`.
-   */
-  def deserializerFor(beanClass: Class[_]): Expression = {
-val typeToken = TypeToken.of(beanClass)
-val walkedTypePath = new 
WalkedTypePath().recordRoot(beanClass.getCanonicalName)
-val (dataType, nullable) = inferDataType(typeToken)
-
-// Assumes we are deserializing the first column of a row.
-deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), 
dataType,
-  nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, 
walkedTypePath))
-  }
-
-  private def deserializerFor(
-  typeToken: TypeToken[_],
-  path: Expression,
-  walkedTypePath: WalkedTypePath): Expression = {
-typeToken.getRawType match {
-  case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
-
-  case c if c == classOf[java.lang.Short] ||
-c == classOf[java.lang.Integer] ||
-c == classOf[java.lang.Long] ||
-c == classOf[java.lang.Double] ||
-c == classOf[java.lang.Float] ||
-c == classOf[java.lang.Byte] ||
-c == classOf[java.lang.Boolean] =>
-createDeserializerForTypesSupportValueOf(path, c)
-
-  case c if c == classOf[java.time.LocalDate] =>
-createDeserializerForLocalDate(path)
-
-  case c if c == classOf[java.sql.Date] =>
-createDeserializerForSqlDate(path)
-
-  case c if c == classOf[java.time.Instant] =>
-createDeserializerForInstant(path)
-
-  case c if c == classOf[java.sql.Timestamp] =>
-createDeserializerForSqlTimestamp(path)
+  private class ImplementsGenericInterface(interface: Class[_]) {
+assert(interface.isInterface)
+assert(interface.getTypeParameters.nonEmpty)
 
-  case c if c == classOf[java.time.LocalDateTime] =>
-createDeserializerForLocalDateTime(path)
-
-  case c if c == classOf[java.time.Duration] =>
-createDeserializerForDuration(path)
-
-  case c if c == classOf[java.time.Period] =>
-createDeserializerForPeriod(path)
-
-  case c if c == classOf[java.lang.String] =>
-createDeserializerForString(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigDecimal] =>
-createDeserializerForJavaBigDecimal(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigInteger] =>
-createDeserializerForJavaBigInteger(path, returnNullable = true)
-
-  case c if c.isArray =>
-val 

Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]

2024-05-07 Thread via GitHub


hvanhovell commented on code in PR #39615:
URL: https://github.com/apache/spark/pull/39615#discussion_r1592553906


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala:
##
@@ -166,317 +148,58 @@ object JavaTypeInference {
   .filter(_.getReadMethod != null)
   }
 
-  private def getJavaBeanReadableAndWritableProperties(
-  beanClass: Class[_]): Array[PropertyDescriptor] = {
-getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null)
-  }
-
-  private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]]
-val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]])
-val iteratorType = iterableSuperType.resolveType(iteratorReturnType)
-iteratorType.resolveType(nextReturnType)
-  }
-
-  private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], 
TypeToken[_]) = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]]
-val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]])
-val keyType = elementType(mapSuperType.resolveType(keySetReturnType))
-val valueType = elementType(mapSuperType.resolveType(valuesReturnType))
-keyType -> valueType
-  }
-
-  /**
-   * Returns the Spark SQL DataType for a given java class.  Where this is not 
an exact mapping
-   * to a native type, an ObjectType is returned.
-   *
-   * Unlike `inferDataType`, this function doesn't do any massaging of types 
into the Spark SQL type
-   * system.  As a result, ObjectType will be returned for things like boxed 
Integers.
-   */
-  private def inferExternalType(cls: Class[_]): DataType = cls match {
-case c if c == java.lang.Boolean.TYPE => BooleanType
-case c if c == java.lang.Byte.TYPE => ByteType
-case c if c == java.lang.Short.TYPE => ShortType
-case c if c == java.lang.Integer.TYPE => IntegerType
-case c if c == java.lang.Long.TYPE => LongType
-case c if c == java.lang.Float.TYPE => FloatType
-case c if c == java.lang.Double.TYPE => DoubleType
-case c if c == classOf[Array[Byte]] => BinaryType
-case _ => ObjectType(cls)
-  }
-
-  /**
-   * Returns an expression that can be used to deserialize a Spark SQL 
representation to an object
-   * of java bean `T` with a compatible schema.  The Spark SQL representation 
is located at ordinal
-   * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have 
their fields accessed
-   * using `UnresolvedExtractValue`.
-   */
-  def deserializerFor(beanClass: Class[_]): Expression = {
-val typeToken = TypeToken.of(beanClass)
-val walkedTypePath = new 
WalkedTypePath().recordRoot(beanClass.getCanonicalName)
-val (dataType, nullable) = inferDataType(typeToken)
-
-// Assumes we are deserializing the first column of a row.
-deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), 
dataType,
-  nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, 
walkedTypePath))
-  }
-
-  private def deserializerFor(
-  typeToken: TypeToken[_],
-  path: Expression,
-  walkedTypePath: WalkedTypePath): Expression = {
-typeToken.getRawType match {
-  case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
-
-  case c if c == classOf[java.lang.Short] ||
-c == classOf[java.lang.Integer] ||
-c == classOf[java.lang.Long] ||
-c == classOf[java.lang.Double] ||
-c == classOf[java.lang.Float] ||
-c == classOf[java.lang.Byte] ||
-c == classOf[java.lang.Boolean] =>
-createDeserializerForTypesSupportValueOf(path, c)
-
-  case c if c == classOf[java.time.LocalDate] =>
-createDeserializerForLocalDate(path)
-
-  case c if c == classOf[java.sql.Date] =>
-createDeserializerForSqlDate(path)
-
-  case c if c == classOf[java.time.Instant] =>
-createDeserializerForInstant(path)
-
-  case c if c == classOf[java.sql.Timestamp] =>
-createDeserializerForSqlTimestamp(path)
+  private class ImplementsGenericInterface(interface: Class[_]) {
+assert(interface.isInterface)
+assert(interface.getTypeParameters.nonEmpty)
 
-  case c if c == classOf[java.time.LocalDateTime] =>
-createDeserializerForLocalDateTime(path)
-
-  case c if c == classOf[java.time.Duration] =>
-createDeserializerForDuration(path)
-
-  case c if c == classOf[java.time.Period] =>
-createDeserializerForPeriod(path)
-
-  case c if c == classOf[java.lang.String] =>
-createDeserializerForString(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigDecimal] =>
-createDeserializerForJavaBigDecimal(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigInteger] =>
-createDeserializerForJavaBigInteger(path, returnNullable = true)
-
-  case c if c.isArray =>
-

Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]

2024-05-05 Thread via GitHub


viirya commented on code in PR #39615:
URL: https://github.com/apache/spark/pull/39615#discussion_r1590399660


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala:
##
@@ -166,317 +148,58 @@ object JavaTypeInference {
   .filter(_.getReadMethod != null)
   }
 
-  private def getJavaBeanReadableAndWritableProperties(
-  beanClass: Class[_]): Array[PropertyDescriptor] = {
-getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null)
-  }
-
-  private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]]
-val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]])
-val iteratorType = iterableSuperType.resolveType(iteratorReturnType)
-iteratorType.resolveType(nextReturnType)
-  }
-
-  private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], 
TypeToken[_]) = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]]
-val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]])
-val keyType = elementType(mapSuperType.resolveType(keySetReturnType))
-val valueType = elementType(mapSuperType.resolveType(valuesReturnType))
-keyType -> valueType
-  }
-
-  /**
-   * Returns the Spark SQL DataType for a given java class.  Where this is not 
an exact mapping
-   * to a native type, an ObjectType is returned.
-   *
-   * Unlike `inferDataType`, this function doesn't do any massaging of types 
into the Spark SQL type
-   * system.  As a result, ObjectType will be returned for things like boxed 
Integers.
-   */
-  private def inferExternalType(cls: Class[_]): DataType = cls match {
-case c if c == java.lang.Boolean.TYPE => BooleanType
-case c if c == java.lang.Byte.TYPE => ByteType
-case c if c == java.lang.Short.TYPE => ShortType
-case c if c == java.lang.Integer.TYPE => IntegerType
-case c if c == java.lang.Long.TYPE => LongType
-case c if c == java.lang.Float.TYPE => FloatType
-case c if c == java.lang.Double.TYPE => DoubleType
-case c if c == classOf[Array[Byte]] => BinaryType
-case _ => ObjectType(cls)
-  }
-
-  /**
-   * Returns an expression that can be used to deserialize a Spark SQL 
representation to an object
-   * of java bean `T` with a compatible schema.  The Spark SQL representation 
is located at ordinal
-   * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have 
their fields accessed
-   * using `UnresolvedExtractValue`.
-   */
-  def deserializerFor(beanClass: Class[_]): Expression = {
-val typeToken = TypeToken.of(beanClass)
-val walkedTypePath = new 
WalkedTypePath().recordRoot(beanClass.getCanonicalName)
-val (dataType, nullable) = inferDataType(typeToken)
-
-// Assumes we are deserializing the first column of a row.
-deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), 
dataType,
-  nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, 
walkedTypePath))
-  }
-
-  private def deserializerFor(
-  typeToken: TypeToken[_],
-  path: Expression,
-  walkedTypePath: WalkedTypePath): Expression = {
-typeToken.getRawType match {
-  case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
-
-  case c if c == classOf[java.lang.Short] ||
-c == classOf[java.lang.Integer] ||
-c == classOf[java.lang.Long] ||
-c == classOf[java.lang.Double] ||
-c == classOf[java.lang.Float] ||
-c == classOf[java.lang.Byte] ||
-c == classOf[java.lang.Boolean] =>
-createDeserializerForTypesSupportValueOf(path, c)
-
-  case c if c == classOf[java.time.LocalDate] =>
-createDeserializerForLocalDate(path)
-
-  case c if c == classOf[java.sql.Date] =>
-createDeserializerForSqlDate(path)
-
-  case c if c == classOf[java.time.Instant] =>
-createDeserializerForInstant(path)
-
-  case c if c == classOf[java.sql.Timestamp] =>
-createDeserializerForSqlTimestamp(path)
+  private class ImplementsGenericInterface(interface: Class[_]) {
+assert(interface.isInterface)
+assert(interface.getTypeParameters.nonEmpty)
 
-  case c if c == classOf[java.time.LocalDateTime] =>
-createDeserializerForLocalDateTime(path)
-
-  case c if c == classOf[java.time.Duration] =>
-createDeserializerForDuration(path)
-
-  case c if c == classOf[java.time.Period] =>
-createDeserializerForPeriod(path)
-
-  case c if c == classOf[java.lang.String] =>
-createDeserializerForString(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigDecimal] =>
-createDeserializerForJavaBigDecimal(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigInteger] =>
-createDeserializerForJavaBigInteger(path, returnNullable = true)
-
-  case c if c.isArray =>
-val 

Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]

2024-05-05 Thread via GitHub


viirya commented on code in PR #39615:
URL: https://github.com/apache/spark/pull/39615#discussion_r1590400148


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala:
##
@@ -166,317 +148,58 @@ object JavaTypeInference {
   .filter(_.getReadMethod != null)
   }
 
-  private def getJavaBeanReadableAndWritableProperties(
-  beanClass: Class[_]): Array[PropertyDescriptor] = {
-getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null)
-  }
-
-  private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]]
-val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]])
-val iteratorType = iterableSuperType.resolveType(iteratorReturnType)
-iteratorType.resolveType(nextReturnType)
-  }
-
-  private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], 
TypeToken[_]) = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]]
-val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]])
-val keyType = elementType(mapSuperType.resolveType(keySetReturnType))
-val valueType = elementType(mapSuperType.resolveType(valuesReturnType))
-keyType -> valueType
-  }
-
-  /**
-   * Returns the Spark SQL DataType for a given java class.  Where this is not 
an exact mapping
-   * to a native type, an ObjectType is returned.
-   *
-   * Unlike `inferDataType`, this function doesn't do any massaging of types 
into the Spark SQL type
-   * system.  As a result, ObjectType will be returned for things like boxed 
Integers.
-   */
-  private def inferExternalType(cls: Class[_]): DataType = cls match {
-case c if c == java.lang.Boolean.TYPE => BooleanType
-case c if c == java.lang.Byte.TYPE => ByteType
-case c if c == java.lang.Short.TYPE => ShortType
-case c if c == java.lang.Integer.TYPE => IntegerType
-case c if c == java.lang.Long.TYPE => LongType
-case c if c == java.lang.Float.TYPE => FloatType
-case c if c == java.lang.Double.TYPE => DoubleType
-case c if c == classOf[Array[Byte]] => BinaryType
-case _ => ObjectType(cls)
-  }
-
-  /**
-   * Returns an expression that can be used to deserialize a Spark SQL 
representation to an object
-   * of java bean `T` with a compatible schema.  The Spark SQL representation 
is located at ordinal
-   * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have 
their fields accessed
-   * using `UnresolvedExtractValue`.
-   */
-  def deserializerFor(beanClass: Class[_]): Expression = {
-val typeToken = TypeToken.of(beanClass)
-val walkedTypePath = new 
WalkedTypePath().recordRoot(beanClass.getCanonicalName)
-val (dataType, nullable) = inferDataType(typeToken)
-
-// Assumes we are deserializing the first column of a row.
-deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), 
dataType,
-  nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, 
walkedTypePath))
-  }
-
-  private def deserializerFor(
-  typeToken: TypeToken[_],
-  path: Expression,
-  walkedTypePath: WalkedTypePath): Expression = {
-typeToken.getRawType match {
-  case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
-
-  case c if c == classOf[java.lang.Short] ||
-c == classOf[java.lang.Integer] ||
-c == classOf[java.lang.Long] ||
-c == classOf[java.lang.Double] ||
-c == classOf[java.lang.Float] ||
-c == classOf[java.lang.Byte] ||
-c == classOf[java.lang.Boolean] =>
-createDeserializerForTypesSupportValueOf(path, c)
-
-  case c if c == classOf[java.time.LocalDate] =>
-createDeserializerForLocalDate(path)
-
-  case c if c == classOf[java.sql.Date] =>
-createDeserializerForSqlDate(path)
-
-  case c if c == classOf[java.time.Instant] =>
-createDeserializerForInstant(path)
-
-  case c if c == classOf[java.sql.Timestamp] =>
-createDeserializerForSqlTimestamp(path)
+  private class ImplementsGenericInterface(interface: Class[_]) {
+assert(interface.isInterface)
+assert(interface.getTypeParameters.nonEmpty)
 
-  case c if c == classOf[java.time.LocalDateTime] =>
-createDeserializerForLocalDateTime(path)
-
-  case c if c == classOf[java.time.Duration] =>
-createDeserializerForDuration(path)
-
-  case c if c == classOf[java.time.Period] =>
-createDeserializerForPeriod(path)
-
-  case c if c == classOf[java.lang.String] =>
-createDeserializerForString(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigDecimal] =>
-createDeserializerForJavaBigDecimal(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigInteger] =>
-createDeserializerForJavaBigInteger(path, returnNullable = true)
-
-  case c if c.isArray =>
-val 

Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]

2024-05-05 Thread via GitHub


viirya commented on code in PR #39615:
URL: https://github.com/apache/spark/pull/39615#discussion_r1590400148


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala:
##
@@ -166,317 +148,58 @@ object JavaTypeInference {
   .filter(_.getReadMethod != null)
   }
 
-  private def getJavaBeanReadableAndWritableProperties(
-  beanClass: Class[_]): Array[PropertyDescriptor] = {
-getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null)
-  }
-
-  private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]]
-val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]])
-val iteratorType = iterableSuperType.resolveType(iteratorReturnType)
-iteratorType.resolveType(nextReturnType)
-  }
-
-  private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], 
TypeToken[_]) = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]]
-val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]])
-val keyType = elementType(mapSuperType.resolveType(keySetReturnType))
-val valueType = elementType(mapSuperType.resolveType(valuesReturnType))
-keyType -> valueType
-  }
-
-  /**
-   * Returns the Spark SQL DataType for a given java class.  Where this is not 
an exact mapping
-   * to a native type, an ObjectType is returned.
-   *
-   * Unlike `inferDataType`, this function doesn't do any massaging of types 
into the Spark SQL type
-   * system.  As a result, ObjectType will be returned for things like boxed 
Integers.
-   */
-  private def inferExternalType(cls: Class[_]): DataType = cls match {
-case c if c == java.lang.Boolean.TYPE => BooleanType
-case c if c == java.lang.Byte.TYPE => ByteType
-case c if c == java.lang.Short.TYPE => ShortType
-case c if c == java.lang.Integer.TYPE => IntegerType
-case c if c == java.lang.Long.TYPE => LongType
-case c if c == java.lang.Float.TYPE => FloatType
-case c if c == java.lang.Double.TYPE => DoubleType
-case c if c == classOf[Array[Byte]] => BinaryType
-case _ => ObjectType(cls)
-  }
-
-  /**
-   * Returns an expression that can be used to deserialize a Spark SQL 
representation to an object
-   * of java bean `T` with a compatible schema.  The Spark SQL representation 
is located at ordinal
-   * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have 
their fields accessed
-   * using `UnresolvedExtractValue`.
-   */
-  def deserializerFor(beanClass: Class[_]): Expression = {
-val typeToken = TypeToken.of(beanClass)
-val walkedTypePath = new 
WalkedTypePath().recordRoot(beanClass.getCanonicalName)
-val (dataType, nullable) = inferDataType(typeToken)
-
-// Assumes we are deserializing the first column of a row.
-deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), 
dataType,
-  nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, 
walkedTypePath))
-  }
-
-  private def deserializerFor(
-  typeToken: TypeToken[_],
-  path: Expression,
-  walkedTypePath: WalkedTypePath): Expression = {
-typeToken.getRawType match {
-  case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
-
-  case c if c == classOf[java.lang.Short] ||
-c == classOf[java.lang.Integer] ||
-c == classOf[java.lang.Long] ||
-c == classOf[java.lang.Double] ||
-c == classOf[java.lang.Float] ||
-c == classOf[java.lang.Byte] ||
-c == classOf[java.lang.Boolean] =>
-createDeserializerForTypesSupportValueOf(path, c)
-
-  case c if c == classOf[java.time.LocalDate] =>
-createDeserializerForLocalDate(path)
-
-  case c if c == classOf[java.sql.Date] =>
-createDeserializerForSqlDate(path)
-
-  case c if c == classOf[java.time.Instant] =>
-createDeserializerForInstant(path)
-
-  case c if c == classOf[java.sql.Timestamp] =>
-createDeserializerForSqlTimestamp(path)
+  private class ImplementsGenericInterface(interface: Class[_]) {
+assert(interface.isInterface)
+assert(interface.getTypeParameters.nonEmpty)
 
-  case c if c == classOf[java.time.LocalDateTime] =>
-createDeserializerForLocalDateTime(path)
-
-  case c if c == classOf[java.time.Duration] =>
-createDeserializerForDuration(path)
-
-  case c if c == classOf[java.time.Period] =>
-createDeserializerForPeriod(path)
-
-  case c if c == classOf[java.lang.String] =>
-createDeserializerForString(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigDecimal] =>
-createDeserializerForJavaBigDecimal(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigInteger] =>
-createDeserializerForJavaBigInteger(path, returnNullable = true)
-
-  case c if c.isArray =>
-val 

Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]

2024-05-05 Thread via GitHub


viirya commented on code in PR #39615:
URL: https://github.com/apache/spark/pull/39615#discussion_r1590399793


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala:
##
@@ -166,317 +148,58 @@ object JavaTypeInference {
   .filter(_.getReadMethod != null)
   }
 
-  private def getJavaBeanReadableAndWritableProperties(
-  beanClass: Class[_]): Array[PropertyDescriptor] = {
-getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null)
-  }
-
-  private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]]
-val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]])
-val iteratorType = iterableSuperType.resolveType(iteratorReturnType)
-iteratorType.resolveType(nextReturnType)
-  }
-
-  private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], 
TypeToken[_]) = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]]
-val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]])
-val keyType = elementType(mapSuperType.resolveType(keySetReturnType))
-val valueType = elementType(mapSuperType.resolveType(valuesReturnType))
-keyType -> valueType
-  }
-
-  /**
-   * Returns the Spark SQL DataType for a given java class.  Where this is not 
an exact mapping
-   * to a native type, an ObjectType is returned.
-   *
-   * Unlike `inferDataType`, this function doesn't do any massaging of types 
into the Spark SQL type
-   * system.  As a result, ObjectType will be returned for things like boxed 
Integers.
-   */
-  private def inferExternalType(cls: Class[_]): DataType = cls match {
-case c if c == java.lang.Boolean.TYPE => BooleanType
-case c if c == java.lang.Byte.TYPE => ByteType
-case c if c == java.lang.Short.TYPE => ShortType
-case c if c == java.lang.Integer.TYPE => IntegerType
-case c if c == java.lang.Long.TYPE => LongType
-case c if c == java.lang.Float.TYPE => FloatType
-case c if c == java.lang.Double.TYPE => DoubleType
-case c if c == classOf[Array[Byte]] => BinaryType
-case _ => ObjectType(cls)
-  }
-
-  /**
-   * Returns an expression that can be used to deserialize a Spark SQL 
representation to an object
-   * of java bean `T` with a compatible schema.  The Spark SQL representation 
is located at ordinal
-   * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have 
their fields accessed
-   * using `UnresolvedExtractValue`.
-   */
-  def deserializerFor(beanClass: Class[_]): Expression = {
-val typeToken = TypeToken.of(beanClass)
-val walkedTypePath = new 
WalkedTypePath().recordRoot(beanClass.getCanonicalName)
-val (dataType, nullable) = inferDataType(typeToken)
-
-// Assumes we are deserializing the first column of a row.
-deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), 
dataType,
-  nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, 
walkedTypePath))
-  }
-
-  private def deserializerFor(
-  typeToken: TypeToken[_],
-  path: Expression,
-  walkedTypePath: WalkedTypePath): Expression = {
-typeToken.getRawType match {
-  case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
-
-  case c if c == classOf[java.lang.Short] ||
-c == classOf[java.lang.Integer] ||
-c == classOf[java.lang.Long] ||
-c == classOf[java.lang.Double] ||
-c == classOf[java.lang.Float] ||
-c == classOf[java.lang.Byte] ||
-c == classOf[java.lang.Boolean] =>
-createDeserializerForTypesSupportValueOf(path, c)
-
-  case c if c == classOf[java.time.LocalDate] =>
-createDeserializerForLocalDate(path)
-
-  case c if c == classOf[java.sql.Date] =>
-createDeserializerForSqlDate(path)
-
-  case c if c == classOf[java.time.Instant] =>
-createDeserializerForInstant(path)
-
-  case c if c == classOf[java.sql.Timestamp] =>
-createDeserializerForSqlTimestamp(path)
+  private class ImplementsGenericInterface(interface: Class[_]) {
+assert(interface.isInterface)
+assert(interface.getTypeParameters.nonEmpty)
 
-  case c if c == classOf[java.time.LocalDateTime] =>
-createDeserializerForLocalDateTime(path)
-
-  case c if c == classOf[java.time.Duration] =>
-createDeserializerForDuration(path)
-
-  case c if c == classOf[java.time.Period] =>
-createDeserializerForPeriod(path)
-
-  case c if c == classOf[java.lang.String] =>
-createDeserializerForString(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigDecimal] =>
-createDeserializerForJavaBigDecimal(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigInteger] =>
-createDeserializerForJavaBigInteger(path, returnNullable = true)
-
-  case c if c.isArray =>
-val 

Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]

2024-05-05 Thread via GitHub


viirya commented on code in PR #39615:
URL: https://github.com/apache/spark/pull/39615#discussion_r1590399660


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala:
##
@@ -166,317 +148,58 @@ object JavaTypeInference {
   .filter(_.getReadMethod != null)
   }
 
-  private def getJavaBeanReadableAndWritableProperties(
-  beanClass: Class[_]): Array[PropertyDescriptor] = {
-getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null)
-  }
-
-  private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]]
-val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]])
-val iteratorType = iterableSuperType.resolveType(iteratorReturnType)
-iteratorType.resolveType(nextReturnType)
-  }
-
-  private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], 
TypeToken[_]) = {
-val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]]
-val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]])
-val keyType = elementType(mapSuperType.resolveType(keySetReturnType))
-val valueType = elementType(mapSuperType.resolveType(valuesReturnType))
-keyType -> valueType
-  }
-
-  /**
-   * Returns the Spark SQL DataType for a given java class.  Where this is not 
an exact mapping
-   * to a native type, an ObjectType is returned.
-   *
-   * Unlike `inferDataType`, this function doesn't do any massaging of types 
into the Spark SQL type
-   * system.  As a result, ObjectType will be returned for things like boxed 
Integers.
-   */
-  private def inferExternalType(cls: Class[_]): DataType = cls match {
-case c if c == java.lang.Boolean.TYPE => BooleanType
-case c if c == java.lang.Byte.TYPE => ByteType
-case c if c == java.lang.Short.TYPE => ShortType
-case c if c == java.lang.Integer.TYPE => IntegerType
-case c if c == java.lang.Long.TYPE => LongType
-case c if c == java.lang.Float.TYPE => FloatType
-case c if c == java.lang.Double.TYPE => DoubleType
-case c if c == classOf[Array[Byte]] => BinaryType
-case _ => ObjectType(cls)
-  }
-
-  /**
-   * Returns an expression that can be used to deserialize a Spark SQL 
representation to an object
-   * of java bean `T` with a compatible schema.  The Spark SQL representation 
is located at ordinal
-   * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have 
their fields accessed
-   * using `UnresolvedExtractValue`.
-   */
-  def deserializerFor(beanClass: Class[_]): Expression = {
-val typeToken = TypeToken.of(beanClass)
-val walkedTypePath = new 
WalkedTypePath().recordRoot(beanClass.getCanonicalName)
-val (dataType, nullable) = inferDataType(typeToken)
-
-// Assumes we are deserializing the first column of a row.
-deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), 
dataType,
-  nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, 
walkedTypePath))
-  }
-
-  private def deserializerFor(
-  typeToken: TypeToken[_],
-  path: Expression,
-  walkedTypePath: WalkedTypePath): Expression = {
-typeToken.getRawType match {
-  case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
-
-  case c if c == classOf[java.lang.Short] ||
-c == classOf[java.lang.Integer] ||
-c == classOf[java.lang.Long] ||
-c == classOf[java.lang.Double] ||
-c == classOf[java.lang.Float] ||
-c == classOf[java.lang.Byte] ||
-c == classOf[java.lang.Boolean] =>
-createDeserializerForTypesSupportValueOf(path, c)
-
-  case c if c == classOf[java.time.LocalDate] =>
-createDeserializerForLocalDate(path)
-
-  case c if c == classOf[java.sql.Date] =>
-createDeserializerForSqlDate(path)
-
-  case c if c == classOf[java.time.Instant] =>
-createDeserializerForInstant(path)
-
-  case c if c == classOf[java.sql.Timestamp] =>
-createDeserializerForSqlTimestamp(path)
+  private class ImplementsGenericInterface(interface: Class[_]) {
+assert(interface.isInterface)
+assert(interface.getTypeParameters.nonEmpty)
 
-  case c if c == classOf[java.time.LocalDateTime] =>
-createDeserializerForLocalDateTime(path)
-
-  case c if c == classOf[java.time.Duration] =>
-createDeserializerForDuration(path)
-
-  case c if c == classOf[java.time.Period] =>
-createDeserializerForPeriod(path)
-
-  case c if c == classOf[java.lang.String] =>
-createDeserializerForString(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigDecimal] =>
-createDeserializerForJavaBigDecimal(path, returnNullable = true)
-
-  case c if c == classOf[java.math.BigInteger] =>
-createDeserializerForJavaBigInteger(path, returnNullable = true)
-
-  case c if c.isArray =>
-val