Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]
viirya commented on code in PR #39615: URL: https://github.com/apache/spark/pull/39615#discussion_r1593353161 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala: ## @@ -166,317 +148,58 @@ object JavaTypeInference { .filter(_.getReadMethod != null) } - private def getJavaBeanReadableAndWritableProperties( - beanClass: Class[_]): Array[PropertyDescriptor] = { -getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null) - } - - private def elementType(typeToken: TypeToken[_]): TypeToken[_] = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]] -val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]]) -val iteratorType = iterableSuperType.resolveType(iteratorReturnType) -iteratorType.resolveType(nextReturnType) - } - - private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], TypeToken[_]) = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]] -val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]]) -val keyType = elementType(mapSuperType.resolveType(keySetReturnType)) -val valueType = elementType(mapSuperType.resolveType(valuesReturnType)) -keyType -> valueType - } - - /** - * Returns the Spark SQL DataType for a given java class. Where this is not an exact mapping - * to a native type, an ObjectType is returned. - * - * Unlike `inferDataType`, this function doesn't do any massaging of types into the Spark SQL type - * system. As a result, ObjectType will be returned for things like boxed Integers. - */ - private def inferExternalType(cls: Class[_]): DataType = cls match { -case c if c == java.lang.Boolean.TYPE => BooleanType -case c if c == java.lang.Byte.TYPE => ByteType -case c if c == java.lang.Short.TYPE => ShortType -case c if c == java.lang.Integer.TYPE => IntegerType -case c if c == java.lang.Long.TYPE => LongType -case c if c == java.lang.Float.TYPE => FloatType -case c if c == java.lang.Double.TYPE => DoubleType -case c if c == classOf[Array[Byte]] => BinaryType -case _ => ObjectType(cls) - } - - /** - * Returns an expression that can be used to deserialize a Spark SQL representation to an object - * of java bean `T` with a compatible schema. The Spark SQL representation is located at ordinal - * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have their fields accessed - * using `UnresolvedExtractValue`. - */ - def deserializerFor(beanClass: Class[_]): Expression = { -val typeToken = TypeToken.of(beanClass) -val walkedTypePath = new WalkedTypePath().recordRoot(beanClass.getCanonicalName) -val (dataType, nullable) = inferDataType(typeToken) - -// Assumes we are deserializing the first column of a row. -deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType, - nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, walkedTypePath)) - } - - private def deserializerFor( - typeToken: TypeToken[_], - path: Expression, - walkedTypePath: WalkedTypePath): Expression = { -typeToken.getRawType match { - case c if !inferExternalType(c).isInstanceOf[ObjectType] => path - - case c if c == classOf[java.lang.Short] || -c == classOf[java.lang.Integer] || -c == classOf[java.lang.Long] || -c == classOf[java.lang.Double] || -c == classOf[java.lang.Float] || -c == classOf[java.lang.Byte] || -c == classOf[java.lang.Boolean] => -createDeserializerForTypesSupportValueOf(path, c) - - case c if c == classOf[java.time.LocalDate] => -createDeserializerForLocalDate(path) - - case c if c == classOf[java.sql.Date] => -createDeserializerForSqlDate(path) - - case c if c == classOf[java.time.Instant] => -createDeserializerForInstant(path) - - case c if c == classOf[java.sql.Timestamp] => -createDeserializerForSqlTimestamp(path) + private class ImplementsGenericInterface(interface: Class[_]) { +assert(interface.isInterface) +assert(interface.getTypeParameters.nonEmpty) - case c if c == classOf[java.time.LocalDateTime] => -createDeserializerForLocalDateTime(path) - - case c if c == classOf[java.time.Duration] => -createDeserializerForDuration(path) - - case c if c == classOf[java.time.Period] => -createDeserializerForPeriod(path) - - case c if c == classOf[java.lang.String] => -createDeserializerForString(path, returnNullable = true) - - case c if c == classOf[java.math.BigDecimal] => -createDeserializerForJavaBigDecimal(path, returnNullable = true) - - case c if c == classOf[java.math.BigInteger] => -createDeserializerForJavaBigInteger(path, returnNullable = true) - - case c if c.isArray => -val
Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]
hvanhovell commented on code in PR #39615: URL: https://github.com/apache/spark/pull/39615#discussion_r1592553906 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala: ## @@ -166,317 +148,58 @@ object JavaTypeInference { .filter(_.getReadMethod != null) } - private def getJavaBeanReadableAndWritableProperties( - beanClass: Class[_]): Array[PropertyDescriptor] = { -getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null) - } - - private def elementType(typeToken: TypeToken[_]): TypeToken[_] = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]] -val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]]) -val iteratorType = iterableSuperType.resolveType(iteratorReturnType) -iteratorType.resolveType(nextReturnType) - } - - private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], TypeToken[_]) = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]] -val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]]) -val keyType = elementType(mapSuperType.resolveType(keySetReturnType)) -val valueType = elementType(mapSuperType.resolveType(valuesReturnType)) -keyType -> valueType - } - - /** - * Returns the Spark SQL DataType for a given java class. Where this is not an exact mapping - * to a native type, an ObjectType is returned. - * - * Unlike `inferDataType`, this function doesn't do any massaging of types into the Spark SQL type - * system. As a result, ObjectType will be returned for things like boxed Integers. - */ - private def inferExternalType(cls: Class[_]): DataType = cls match { -case c if c == java.lang.Boolean.TYPE => BooleanType -case c if c == java.lang.Byte.TYPE => ByteType -case c if c == java.lang.Short.TYPE => ShortType -case c if c == java.lang.Integer.TYPE => IntegerType -case c if c == java.lang.Long.TYPE => LongType -case c if c == java.lang.Float.TYPE => FloatType -case c if c == java.lang.Double.TYPE => DoubleType -case c if c == classOf[Array[Byte]] => BinaryType -case _ => ObjectType(cls) - } - - /** - * Returns an expression that can be used to deserialize a Spark SQL representation to an object - * of java bean `T` with a compatible schema. The Spark SQL representation is located at ordinal - * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have their fields accessed - * using `UnresolvedExtractValue`. - */ - def deserializerFor(beanClass: Class[_]): Expression = { -val typeToken = TypeToken.of(beanClass) -val walkedTypePath = new WalkedTypePath().recordRoot(beanClass.getCanonicalName) -val (dataType, nullable) = inferDataType(typeToken) - -// Assumes we are deserializing the first column of a row. -deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType, - nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, walkedTypePath)) - } - - private def deserializerFor( - typeToken: TypeToken[_], - path: Expression, - walkedTypePath: WalkedTypePath): Expression = { -typeToken.getRawType match { - case c if !inferExternalType(c).isInstanceOf[ObjectType] => path - - case c if c == classOf[java.lang.Short] || -c == classOf[java.lang.Integer] || -c == classOf[java.lang.Long] || -c == classOf[java.lang.Double] || -c == classOf[java.lang.Float] || -c == classOf[java.lang.Byte] || -c == classOf[java.lang.Boolean] => -createDeserializerForTypesSupportValueOf(path, c) - - case c if c == classOf[java.time.LocalDate] => -createDeserializerForLocalDate(path) - - case c if c == classOf[java.sql.Date] => -createDeserializerForSqlDate(path) - - case c if c == classOf[java.time.Instant] => -createDeserializerForInstant(path) - - case c if c == classOf[java.sql.Timestamp] => -createDeserializerForSqlTimestamp(path) + private class ImplementsGenericInterface(interface: Class[_]) { +assert(interface.isInterface) +assert(interface.getTypeParameters.nonEmpty) - case c if c == classOf[java.time.LocalDateTime] => -createDeserializerForLocalDateTime(path) - - case c if c == classOf[java.time.Duration] => -createDeserializerForDuration(path) - - case c if c == classOf[java.time.Period] => -createDeserializerForPeriod(path) - - case c if c == classOf[java.lang.String] => -createDeserializerForString(path, returnNullable = true) - - case c if c == classOf[java.math.BigDecimal] => -createDeserializerForJavaBigDecimal(path, returnNullable = true) - - case c if c == classOf[java.math.BigInteger] => -createDeserializerForJavaBigInteger(path, returnNullable = true) - - case c if c.isArray => -
Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]
viirya commented on code in PR #39615: URL: https://github.com/apache/spark/pull/39615#discussion_r1590399660 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala: ## @@ -166,317 +148,58 @@ object JavaTypeInference { .filter(_.getReadMethod != null) } - private def getJavaBeanReadableAndWritableProperties( - beanClass: Class[_]): Array[PropertyDescriptor] = { -getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null) - } - - private def elementType(typeToken: TypeToken[_]): TypeToken[_] = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]] -val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]]) -val iteratorType = iterableSuperType.resolveType(iteratorReturnType) -iteratorType.resolveType(nextReturnType) - } - - private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], TypeToken[_]) = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]] -val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]]) -val keyType = elementType(mapSuperType.resolveType(keySetReturnType)) -val valueType = elementType(mapSuperType.resolveType(valuesReturnType)) -keyType -> valueType - } - - /** - * Returns the Spark SQL DataType for a given java class. Where this is not an exact mapping - * to a native type, an ObjectType is returned. - * - * Unlike `inferDataType`, this function doesn't do any massaging of types into the Spark SQL type - * system. As a result, ObjectType will be returned for things like boxed Integers. - */ - private def inferExternalType(cls: Class[_]): DataType = cls match { -case c if c == java.lang.Boolean.TYPE => BooleanType -case c if c == java.lang.Byte.TYPE => ByteType -case c if c == java.lang.Short.TYPE => ShortType -case c if c == java.lang.Integer.TYPE => IntegerType -case c if c == java.lang.Long.TYPE => LongType -case c if c == java.lang.Float.TYPE => FloatType -case c if c == java.lang.Double.TYPE => DoubleType -case c if c == classOf[Array[Byte]] => BinaryType -case _ => ObjectType(cls) - } - - /** - * Returns an expression that can be used to deserialize a Spark SQL representation to an object - * of java bean `T` with a compatible schema. The Spark SQL representation is located at ordinal - * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have their fields accessed - * using `UnresolvedExtractValue`. - */ - def deserializerFor(beanClass: Class[_]): Expression = { -val typeToken = TypeToken.of(beanClass) -val walkedTypePath = new WalkedTypePath().recordRoot(beanClass.getCanonicalName) -val (dataType, nullable) = inferDataType(typeToken) - -// Assumes we are deserializing the first column of a row. -deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType, - nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, walkedTypePath)) - } - - private def deserializerFor( - typeToken: TypeToken[_], - path: Expression, - walkedTypePath: WalkedTypePath): Expression = { -typeToken.getRawType match { - case c if !inferExternalType(c).isInstanceOf[ObjectType] => path - - case c if c == classOf[java.lang.Short] || -c == classOf[java.lang.Integer] || -c == classOf[java.lang.Long] || -c == classOf[java.lang.Double] || -c == classOf[java.lang.Float] || -c == classOf[java.lang.Byte] || -c == classOf[java.lang.Boolean] => -createDeserializerForTypesSupportValueOf(path, c) - - case c if c == classOf[java.time.LocalDate] => -createDeserializerForLocalDate(path) - - case c if c == classOf[java.sql.Date] => -createDeserializerForSqlDate(path) - - case c if c == classOf[java.time.Instant] => -createDeserializerForInstant(path) - - case c if c == classOf[java.sql.Timestamp] => -createDeserializerForSqlTimestamp(path) + private class ImplementsGenericInterface(interface: Class[_]) { +assert(interface.isInterface) +assert(interface.getTypeParameters.nonEmpty) - case c if c == classOf[java.time.LocalDateTime] => -createDeserializerForLocalDateTime(path) - - case c if c == classOf[java.time.Duration] => -createDeserializerForDuration(path) - - case c if c == classOf[java.time.Period] => -createDeserializerForPeriod(path) - - case c if c == classOf[java.lang.String] => -createDeserializerForString(path, returnNullable = true) - - case c if c == classOf[java.math.BigDecimal] => -createDeserializerForJavaBigDecimal(path, returnNullable = true) - - case c if c == classOf[java.math.BigInteger] => -createDeserializerForJavaBigInteger(path, returnNullable = true) - - case c if c.isArray => -val
Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]
viirya commented on code in PR #39615: URL: https://github.com/apache/spark/pull/39615#discussion_r1590400148 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala: ## @@ -166,317 +148,58 @@ object JavaTypeInference { .filter(_.getReadMethod != null) } - private def getJavaBeanReadableAndWritableProperties( - beanClass: Class[_]): Array[PropertyDescriptor] = { -getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null) - } - - private def elementType(typeToken: TypeToken[_]): TypeToken[_] = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]] -val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]]) -val iteratorType = iterableSuperType.resolveType(iteratorReturnType) -iteratorType.resolveType(nextReturnType) - } - - private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], TypeToken[_]) = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]] -val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]]) -val keyType = elementType(mapSuperType.resolveType(keySetReturnType)) -val valueType = elementType(mapSuperType.resolveType(valuesReturnType)) -keyType -> valueType - } - - /** - * Returns the Spark SQL DataType for a given java class. Where this is not an exact mapping - * to a native type, an ObjectType is returned. - * - * Unlike `inferDataType`, this function doesn't do any massaging of types into the Spark SQL type - * system. As a result, ObjectType will be returned for things like boxed Integers. - */ - private def inferExternalType(cls: Class[_]): DataType = cls match { -case c if c == java.lang.Boolean.TYPE => BooleanType -case c if c == java.lang.Byte.TYPE => ByteType -case c if c == java.lang.Short.TYPE => ShortType -case c if c == java.lang.Integer.TYPE => IntegerType -case c if c == java.lang.Long.TYPE => LongType -case c if c == java.lang.Float.TYPE => FloatType -case c if c == java.lang.Double.TYPE => DoubleType -case c if c == classOf[Array[Byte]] => BinaryType -case _ => ObjectType(cls) - } - - /** - * Returns an expression that can be used to deserialize a Spark SQL representation to an object - * of java bean `T` with a compatible schema. The Spark SQL representation is located at ordinal - * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have their fields accessed - * using `UnresolvedExtractValue`. - */ - def deserializerFor(beanClass: Class[_]): Expression = { -val typeToken = TypeToken.of(beanClass) -val walkedTypePath = new WalkedTypePath().recordRoot(beanClass.getCanonicalName) -val (dataType, nullable) = inferDataType(typeToken) - -// Assumes we are deserializing the first column of a row. -deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType, - nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, walkedTypePath)) - } - - private def deserializerFor( - typeToken: TypeToken[_], - path: Expression, - walkedTypePath: WalkedTypePath): Expression = { -typeToken.getRawType match { - case c if !inferExternalType(c).isInstanceOf[ObjectType] => path - - case c if c == classOf[java.lang.Short] || -c == classOf[java.lang.Integer] || -c == classOf[java.lang.Long] || -c == classOf[java.lang.Double] || -c == classOf[java.lang.Float] || -c == classOf[java.lang.Byte] || -c == classOf[java.lang.Boolean] => -createDeserializerForTypesSupportValueOf(path, c) - - case c if c == classOf[java.time.LocalDate] => -createDeserializerForLocalDate(path) - - case c if c == classOf[java.sql.Date] => -createDeserializerForSqlDate(path) - - case c if c == classOf[java.time.Instant] => -createDeserializerForInstant(path) - - case c if c == classOf[java.sql.Timestamp] => -createDeserializerForSqlTimestamp(path) + private class ImplementsGenericInterface(interface: Class[_]) { +assert(interface.isInterface) +assert(interface.getTypeParameters.nonEmpty) - case c if c == classOf[java.time.LocalDateTime] => -createDeserializerForLocalDateTime(path) - - case c if c == classOf[java.time.Duration] => -createDeserializerForDuration(path) - - case c if c == classOf[java.time.Period] => -createDeserializerForPeriod(path) - - case c if c == classOf[java.lang.String] => -createDeserializerForString(path, returnNullable = true) - - case c if c == classOf[java.math.BigDecimal] => -createDeserializerForJavaBigDecimal(path, returnNullable = true) - - case c if c == classOf[java.math.BigInteger] => -createDeserializerForJavaBigInteger(path, returnNullable = true) - - case c if c.isArray => -val
Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]
viirya commented on code in PR #39615: URL: https://github.com/apache/spark/pull/39615#discussion_r1590400148 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala: ## @@ -166,317 +148,58 @@ object JavaTypeInference { .filter(_.getReadMethod != null) } - private def getJavaBeanReadableAndWritableProperties( - beanClass: Class[_]): Array[PropertyDescriptor] = { -getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null) - } - - private def elementType(typeToken: TypeToken[_]): TypeToken[_] = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]] -val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]]) -val iteratorType = iterableSuperType.resolveType(iteratorReturnType) -iteratorType.resolveType(nextReturnType) - } - - private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], TypeToken[_]) = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]] -val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]]) -val keyType = elementType(mapSuperType.resolveType(keySetReturnType)) -val valueType = elementType(mapSuperType.resolveType(valuesReturnType)) -keyType -> valueType - } - - /** - * Returns the Spark SQL DataType for a given java class. Where this is not an exact mapping - * to a native type, an ObjectType is returned. - * - * Unlike `inferDataType`, this function doesn't do any massaging of types into the Spark SQL type - * system. As a result, ObjectType will be returned for things like boxed Integers. - */ - private def inferExternalType(cls: Class[_]): DataType = cls match { -case c if c == java.lang.Boolean.TYPE => BooleanType -case c if c == java.lang.Byte.TYPE => ByteType -case c if c == java.lang.Short.TYPE => ShortType -case c if c == java.lang.Integer.TYPE => IntegerType -case c if c == java.lang.Long.TYPE => LongType -case c if c == java.lang.Float.TYPE => FloatType -case c if c == java.lang.Double.TYPE => DoubleType -case c if c == classOf[Array[Byte]] => BinaryType -case _ => ObjectType(cls) - } - - /** - * Returns an expression that can be used to deserialize a Spark SQL representation to an object - * of java bean `T` with a compatible schema. The Spark SQL representation is located at ordinal - * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have their fields accessed - * using `UnresolvedExtractValue`. - */ - def deserializerFor(beanClass: Class[_]): Expression = { -val typeToken = TypeToken.of(beanClass) -val walkedTypePath = new WalkedTypePath().recordRoot(beanClass.getCanonicalName) -val (dataType, nullable) = inferDataType(typeToken) - -// Assumes we are deserializing the first column of a row. -deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType, - nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, walkedTypePath)) - } - - private def deserializerFor( - typeToken: TypeToken[_], - path: Expression, - walkedTypePath: WalkedTypePath): Expression = { -typeToken.getRawType match { - case c if !inferExternalType(c).isInstanceOf[ObjectType] => path - - case c if c == classOf[java.lang.Short] || -c == classOf[java.lang.Integer] || -c == classOf[java.lang.Long] || -c == classOf[java.lang.Double] || -c == classOf[java.lang.Float] || -c == classOf[java.lang.Byte] || -c == classOf[java.lang.Boolean] => -createDeserializerForTypesSupportValueOf(path, c) - - case c if c == classOf[java.time.LocalDate] => -createDeserializerForLocalDate(path) - - case c if c == classOf[java.sql.Date] => -createDeserializerForSqlDate(path) - - case c if c == classOf[java.time.Instant] => -createDeserializerForInstant(path) - - case c if c == classOf[java.sql.Timestamp] => -createDeserializerForSqlTimestamp(path) + private class ImplementsGenericInterface(interface: Class[_]) { +assert(interface.isInterface) +assert(interface.getTypeParameters.nonEmpty) - case c if c == classOf[java.time.LocalDateTime] => -createDeserializerForLocalDateTime(path) - - case c if c == classOf[java.time.Duration] => -createDeserializerForDuration(path) - - case c if c == classOf[java.time.Period] => -createDeserializerForPeriod(path) - - case c if c == classOf[java.lang.String] => -createDeserializerForString(path, returnNullable = true) - - case c if c == classOf[java.math.BigDecimal] => -createDeserializerForJavaBigDecimal(path, returnNullable = true) - - case c if c == classOf[java.math.BigInteger] => -createDeserializerForJavaBigInteger(path, returnNullable = true) - - case c if c.isArray => -val
Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]
viirya commented on code in PR #39615: URL: https://github.com/apache/spark/pull/39615#discussion_r1590399793 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala: ## @@ -166,317 +148,58 @@ object JavaTypeInference { .filter(_.getReadMethod != null) } - private def getJavaBeanReadableAndWritableProperties( - beanClass: Class[_]): Array[PropertyDescriptor] = { -getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null) - } - - private def elementType(typeToken: TypeToken[_]): TypeToken[_] = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]] -val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]]) -val iteratorType = iterableSuperType.resolveType(iteratorReturnType) -iteratorType.resolveType(nextReturnType) - } - - private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], TypeToken[_]) = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]] -val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]]) -val keyType = elementType(mapSuperType.resolveType(keySetReturnType)) -val valueType = elementType(mapSuperType.resolveType(valuesReturnType)) -keyType -> valueType - } - - /** - * Returns the Spark SQL DataType for a given java class. Where this is not an exact mapping - * to a native type, an ObjectType is returned. - * - * Unlike `inferDataType`, this function doesn't do any massaging of types into the Spark SQL type - * system. As a result, ObjectType will be returned for things like boxed Integers. - */ - private def inferExternalType(cls: Class[_]): DataType = cls match { -case c if c == java.lang.Boolean.TYPE => BooleanType -case c if c == java.lang.Byte.TYPE => ByteType -case c if c == java.lang.Short.TYPE => ShortType -case c if c == java.lang.Integer.TYPE => IntegerType -case c if c == java.lang.Long.TYPE => LongType -case c if c == java.lang.Float.TYPE => FloatType -case c if c == java.lang.Double.TYPE => DoubleType -case c if c == classOf[Array[Byte]] => BinaryType -case _ => ObjectType(cls) - } - - /** - * Returns an expression that can be used to deserialize a Spark SQL representation to an object - * of java bean `T` with a compatible schema. The Spark SQL representation is located at ordinal - * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have their fields accessed - * using `UnresolvedExtractValue`. - */ - def deserializerFor(beanClass: Class[_]): Expression = { -val typeToken = TypeToken.of(beanClass) -val walkedTypePath = new WalkedTypePath().recordRoot(beanClass.getCanonicalName) -val (dataType, nullable) = inferDataType(typeToken) - -// Assumes we are deserializing the first column of a row. -deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType, - nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, walkedTypePath)) - } - - private def deserializerFor( - typeToken: TypeToken[_], - path: Expression, - walkedTypePath: WalkedTypePath): Expression = { -typeToken.getRawType match { - case c if !inferExternalType(c).isInstanceOf[ObjectType] => path - - case c if c == classOf[java.lang.Short] || -c == classOf[java.lang.Integer] || -c == classOf[java.lang.Long] || -c == classOf[java.lang.Double] || -c == classOf[java.lang.Float] || -c == classOf[java.lang.Byte] || -c == classOf[java.lang.Boolean] => -createDeserializerForTypesSupportValueOf(path, c) - - case c if c == classOf[java.time.LocalDate] => -createDeserializerForLocalDate(path) - - case c if c == classOf[java.sql.Date] => -createDeserializerForSqlDate(path) - - case c if c == classOf[java.time.Instant] => -createDeserializerForInstant(path) - - case c if c == classOf[java.sql.Timestamp] => -createDeserializerForSqlTimestamp(path) + private class ImplementsGenericInterface(interface: Class[_]) { +assert(interface.isInterface) +assert(interface.getTypeParameters.nonEmpty) - case c if c == classOf[java.time.LocalDateTime] => -createDeserializerForLocalDateTime(path) - - case c if c == classOf[java.time.Duration] => -createDeserializerForDuration(path) - - case c if c == classOf[java.time.Period] => -createDeserializerForPeriod(path) - - case c if c == classOf[java.lang.String] => -createDeserializerForString(path, returnNullable = true) - - case c if c == classOf[java.math.BigDecimal] => -createDeserializerForJavaBigDecimal(path, returnNullable = true) - - case c if c == classOf[java.math.BigInteger] => -createDeserializerForJavaBigInteger(path, returnNullable = true) - - case c if c.isArray => -val
Re: [PR] [SPARK-42093][SQL] Move JavaTypeInference to AgnosticEncoders [spark]
viirya commented on code in PR #39615: URL: https://github.com/apache/spark/pull/39615#discussion_r1590399660 ## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala: ## @@ -166,317 +148,58 @@ object JavaTypeInference { .filter(_.getReadMethod != null) } - private def getJavaBeanReadableAndWritableProperties( - beanClass: Class[_]): Array[PropertyDescriptor] = { -getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null) - } - - private def elementType(typeToken: TypeToken[_]): TypeToken[_] = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]] -val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]]) -val iteratorType = iterableSuperType.resolveType(iteratorReturnType) -iteratorType.resolveType(nextReturnType) - } - - private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], TypeToken[_]) = { -val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]] -val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]]) -val keyType = elementType(mapSuperType.resolveType(keySetReturnType)) -val valueType = elementType(mapSuperType.resolveType(valuesReturnType)) -keyType -> valueType - } - - /** - * Returns the Spark SQL DataType for a given java class. Where this is not an exact mapping - * to a native type, an ObjectType is returned. - * - * Unlike `inferDataType`, this function doesn't do any massaging of types into the Spark SQL type - * system. As a result, ObjectType will be returned for things like boxed Integers. - */ - private def inferExternalType(cls: Class[_]): DataType = cls match { -case c if c == java.lang.Boolean.TYPE => BooleanType -case c if c == java.lang.Byte.TYPE => ByteType -case c if c == java.lang.Short.TYPE => ShortType -case c if c == java.lang.Integer.TYPE => IntegerType -case c if c == java.lang.Long.TYPE => LongType -case c if c == java.lang.Float.TYPE => FloatType -case c if c == java.lang.Double.TYPE => DoubleType -case c if c == classOf[Array[Byte]] => BinaryType -case _ => ObjectType(cls) - } - - /** - * Returns an expression that can be used to deserialize a Spark SQL representation to an object - * of java bean `T` with a compatible schema. The Spark SQL representation is located at ordinal - * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have their fields accessed - * using `UnresolvedExtractValue`. - */ - def deserializerFor(beanClass: Class[_]): Expression = { -val typeToken = TypeToken.of(beanClass) -val walkedTypePath = new WalkedTypePath().recordRoot(beanClass.getCanonicalName) -val (dataType, nullable) = inferDataType(typeToken) - -// Assumes we are deserializing the first column of a row. -deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType, - nullable = nullable, walkedTypePath, deserializerFor(typeToken, _, walkedTypePath)) - } - - private def deserializerFor( - typeToken: TypeToken[_], - path: Expression, - walkedTypePath: WalkedTypePath): Expression = { -typeToken.getRawType match { - case c if !inferExternalType(c).isInstanceOf[ObjectType] => path - - case c if c == classOf[java.lang.Short] || -c == classOf[java.lang.Integer] || -c == classOf[java.lang.Long] || -c == classOf[java.lang.Double] || -c == classOf[java.lang.Float] || -c == classOf[java.lang.Byte] || -c == classOf[java.lang.Boolean] => -createDeserializerForTypesSupportValueOf(path, c) - - case c if c == classOf[java.time.LocalDate] => -createDeserializerForLocalDate(path) - - case c if c == classOf[java.sql.Date] => -createDeserializerForSqlDate(path) - - case c if c == classOf[java.time.Instant] => -createDeserializerForInstant(path) - - case c if c == classOf[java.sql.Timestamp] => -createDeserializerForSqlTimestamp(path) + private class ImplementsGenericInterface(interface: Class[_]) { +assert(interface.isInterface) +assert(interface.getTypeParameters.nonEmpty) - case c if c == classOf[java.time.LocalDateTime] => -createDeserializerForLocalDateTime(path) - - case c if c == classOf[java.time.Duration] => -createDeserializerForDuration(path) - - case c if c == classOf[java.time.Period] => -createDeserializerForPeriod(path) - - case c if c == classOf[java.lang.String] => -createDeserializerForString(path, returnNullable = true) - - case c if c == classOf[java.math.BigDecimal] => -createDeserializerForJavaBigDecimal(path, returnNullable = true) - - case c if c == classOf[java.math.BigInteger] => -createDeserializerForJavaBigInteger(path, returnNullable = true) - - case c if c.isArray => -val