cloud-fan commented on a change in pull request #29303: URL: https://github.com/apache/spark/pull/29303#discussion_r464293696
########## File path: sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala ########## @@ -126,12 +124,52 @@ private[hive] class SparkGetColumnsOperation( HiveThriftServer2.eventManager.onStatementFinish(statementId) } + /** + * For numeric and datetime types, it returns the default size of its catalyst type + * For struct type, when its elements are fixed-size, the summation of all element sizes will be + * returned. + * For array, map, string, and binaries, the column size is variable, return null as unknown. + */ + private def getColumnSize(typ: DataType): Option[Int] = typ match { + case StringType | BinaryType | _: ArrayType | _: MapType => None + case StructType(fields) => + val sizeArr = fields.map(f => getColumnSize(f.dataType)) + if (sizeArr.contains(None)) { + None + } else { + Some(sizeArr.map(_.get).sum) + } + case other => Some(other.defaultSize) Review comment: nit: I think it's safer to list the types we know the size, instead of listing the types we don't know the size. I'd prefer ``` case dt @ (_: NumericType || DateType || TimestampType) => dt.defaultSize ... ``` ########## File path: sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala ########## @@ -126,12 +124,52 @@ private[hive] class SparkGetColumnsOperation( HiveThriftServer2.eventManager.onStatementFinish(statementId) } + /** + * For numeric and datetime types, it returns the default size of its catalyst type + * For struct type, when its elements are fixed-size, the summation of all element sizes will be + * returned. + * For array, map, string, and binaries, the column size is variable, return null as unknown. + */ + private def getColumnSize(typ: DataType): Option[Int] = typ match { + case StringType | BinaryType | _: ArrayType | _: MapType => None + case StructType(fields) => + val sizeArr = fields.map(f => getColumnSize(f.dataType)) + if (sizeArr.contains(None)) { + None + } else { + Some(sizeArr.map(_.get).sum) + } + case other => Some(other.defaultSize) Review comment: nit: I think it's safer to list the types we know the size, instead of listing the types we don't know the size. I'd prefer ``` case dt @ (_: NumericType | DateType | TimestampType) => dt.defaultSize ... ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org