MaxGekk commented on a change in pull request #31549: URL: https://github.com/apache/spark/pull/31549#discussion_r578640897
########## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala ########## @@ -486,34 +485,49 @@ object PartitioningUtils { val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(zoneId.getId)).eval() // Disallow TimestampType if the cast returned null require(timestampValue != null) - Literal.create(timestampValue, TimestampType) + TimestampType } - if (typeInference) { + val dataType = if (typeInference) { // First tries integral types - Try(Literal.create(Integer.parseInt(raw), IntegerType)) - .orElse(Try(Literal.create(JLong.parseLong(raw), LongType))) + Try({ Integer.parseInt(raw); IntegerType }) + .orElse(Try { JLong.parseLong(raw); LongType }) .orElse(decimalTry) // Then falls back to fractional types - .orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType))) + .orElse(Try { JDouble.parseDouble(raw); DoubleType }) // Then falls back to date/timestamp types .orElse(timestampTry) .orElse(dateTry) // Then falls back to string .getOrElse { - if (raw == DEFAULT_PARTITION_NAME) { - Literal.create(null, NullType) - } else { - Literal.create(unescapePathName(raw), StringType) - } + if (raw == DEFAULT_PARTITION_NAME) NullType else StringType } } else { - if (raw == DEFAULT_PARTITION_NAME) { - Literal.create(null, NullType) - } else { - Literal.create(unescapePathName(raw), StringType) - } + if (raw == DEFAULT_PARTITION_NAME) NullType else StringType } + dataType + } + + def castPartValueToDesiredType( + desiredType: DataType, + value: String, + zoneId: ZoneId): Any = desiredType match { + case _ if value == DEFAULT_PARTITION_NAME => null + case NullType => null + case StringType => UTF8String.fromString(unescapePathName(value)) + case IntegerType => Integer.parseInt(value) + case LongType => JLong.parseLong(value) + case DoubleType => JDouble.parseDouble(value) + case _: DecimalType => Literal(new JBigDecimal(value)).value + case DateType => + Cast(Literal(value), DateType, Some(zoneId.getId)).eval() + case TimestampType => + Try { + Cast(Literal(unescapePathName(value)), TimestampType, Some(zoneId.getId)).eval() Review comment: Every time when we get a value from the file system as a part of file name. Since you added the method in https://github.com/apache/spark/pull/15797/files#diff-74a21c0670bb349140a57b99a473d036060a6bcd20d22674346cc77213cebac1R76, you should know better ;-) ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org