dbatomic commented on code in PR #45383: URL: https://github.com/apache/spark/pull/45383#discussion_r1519574637
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala: ########## @@ -138,21 +140,31 @@ object AnsiTypeCoercion extends TypeCoercionBase { @scala.annotation.tailrec private def findWiderTypeForString(dt1: DataType, dt2: DataType): Option[DataType] = { (dt1, dt2) match { - case (StringType, _: IntegralType) => Some(LongType) - case (StringType, _: FractionalType) => Some(DoubleType) - case (StringType, NullType) => Some(StringType) + case (_: StringType, _: IntegralType) => Some(LongType) + case (_: StringType, _: FractionalType) => Some(DoubleType) + case (st: StringType, NullType) => Some(st) // If a binary operation contains interval type and string, we can't decide which // interval type the string should be promoted as. There are many possible interval // types, such as year interval, month interval, day interval, hour interval, etc. - case (StringType, _: AnsiIntervalType) => None - case (StringType, a: AtomicType) => Some(a) - case (other, StringType) if other != StringType => findWiderTypeForString(StringType, other) + case (_: StringType, _: AnsiIntervalType) => None + case (_: StringType, a: AtomicType) => Some(a) + case (other, st: StringType) if !other.isInstanceOf[StringType] => + findWiderTypeForString(st, other) case _ => None } } - override def findWiderCommonType(types: Seq[DataType]): Option[DataType] = { - types.foldLeft[Option[DataType]](Some(NullType))((r, c) => + override def findWiderCommonType(exprs: Seq[Expression], + failOnIndeterminate: Boolean = false): Option[DataType] = { + (if (exprs.map(_.dataType).partition(hasStringType)._1.distinct.size > 1) { + val collationId = CollationTypeCasts.getOutputCollation(exprs, failOnIndeterminate) + exprs.map(e => + if (hasStringType(e.dataType)) { + castStringType(e.dataType, collationId) + e + } + else e) + } else exprs).map(_.dataType).foldLeft[Option[DataType]](Some(NullType))((r, c) => r match { case Some(d) => findWiderTypeForTwo(d, c) Review Comment: I find this pretty weird. Why can't we just rely on `fold` + `findWiderTypeForTwo` logic? I think that type checks should remain `foldable` even with collation concept? i.e. we should always be able to determine output collation by just comparing two expressions and eventually folding to the result? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org