Github user dilipbiswal commented on a diff in the pull request: https://github.com/apache/spark/pull/22619#discussion_r222198784 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala --- @@ -172,51 +172,35 @@ private[csv] object CSVInferSchema { StringType } - private val numericPrecedence: IndexedSeq[DataType] = TypeCoercion.numericPrecedence - /** - * Copied from internal Spark api - * [[org.apache.spark.sql.catalyst.analysis.TypeCoercion]] + * Returns the common data type given two input data types so that the return type + * is compatible with both input data types. */ - val findTightestCommonType: (DataType, DataType) => Option[DataType] = { - case (t1, t2) if t1 == t2 => Some(t1) - case (NullType, t1) => Some(t1) - case (t1, NullType) => Some(t1) - case (StringType, t2) => Some(StringType) - case (t1, StringType) => Some(StringType) - - // Promote numeric types to the highest of the two and all numeric types to unlimited decimal - case (t1, t2) if Seq(t1, t2).forall(numericPrecedence.contains) => - val index = numericPrecedence.lastIndexWhere(t => t == t1 || t == t2) - Some(numericPrecedence(index)) - - // These two cases below deal with when `DecimalType` is larger than `IntegralType`. - case (t1: IntegralType, t2: DecimalType) if t2.isWiderThan(t1) => - Some(t2) - case (t1: DecimalType, t2: IntegralType) if t1.isWiderThan(t2) => - Some(t1) - - // These two cases below deal with when `IntegralType` is larger than `DecimalType`. - case (t1: IntegralType, t2: DecimalType) => - findTightestCommonType(DecimalType.forType(t1), t2) - case (t1: DecimalType, t2: IntegralType) => - findTightestCommonType(t1, DecimalType.forType(t2)) - - // Double support larger range than fixed decimal, DecimalType.Maximum should be enough - // in most case, also have better precision. - case (DoubleType, _: DecimalType) | (_: DecimalType, DoubleType) => - Some(DoubleType) - - case (t1: DecimalType, t2: DecimalType) => - val scale = math.max(t1.scale, t2.scale) - val range = math.max(t1.precision - t1.scale, t2.precision - t2.scale) - if (range + scale > 38) { - // DecimalType can't support precision > 38 - Some(DoubleType) - } else { - Some(DecimalType(range + scale, scale)) + def compatibleType(t1: DataType, t2: DataType): Option[DataType] = { --- End diff -- @viirya i kept the same name used in JsonInferSchema. Change that as well ? Or only change this ?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org