brianrice2 commented on code in PR #655:
URL: https://github.com/apache/incubator-sedona/pull/655#discussion_r938385143
##########
sql/src/main/scala/org/apache/sedona/sql/utils/Adapter.scala:
##########
@@ -175,4 +256,80 @@ object Adapter {
}
else (Seq(geom), Seq())
}
+
+ /**
+ * Generate a Row with desired schema from a sequence of Geometry and
String objects.
+ *
+ * @param stringRow Sequence of Geometry objects and String user data
+ * @param schema Desired output schema
+ * @return Row with the specified schema
+ */
+ private def castRowToSchema(stringRow: Seq[Any], schema: StructType): Row = {
+ val parsedRow = stringRow.zipWithIndex.map { case (value, idx) =>
+ val desiredDataType = schema(idx).dataType
+ // Don't convert geometry data, only user data
+ if (desiredDataType == GeometryUDT) value else
parseString(value.toString, desiredDataType)
+ }
+
+ Row.fromSeq(parsedRow)
+ }
+
+ /**
+ * Parse a string to another data type based on the desired schema.
+ *
+ * @param data Data stored as string
+ * @param desiredType Desired SparkSQL data type
+ * @return Parsed value, or in the case of a struct column, an array of
parsed values
+ */
+ private def parseString(data: String, desiredType: DataType): Any = {
+ // Spark needs to know how to serialize null values, so we have to provide
the relevant class
+ if (data == "null") {
+ return desiredType match {
+ case _: ByteType => null.asInstanceOf[Byte]
+ case _: ShortType => null.asInstanceOf[Short]
+ case _: IntegerType => null.asInstanceOf[Integer]
+ case _: LongType => null.asInstanceOf[Long]
+ case _: FloatType => null.asInstanceOf[Float]
+ case _: DoubleType => null.asInstanceOf[Double]
+ case _: DateType => null.asInstanceOf[Date]
+ case _: TimestampType => null.asInstanceOf[Timestamp]
+ case _: BooleanType => null.asInstanceOf[Boolean]
+ case _: StringType => null.asInstanceOf[String]
+ }
+ }
+
+ desiredType match {
+ case _: ByteType => data.toByte
+ case _: ShortType => data.toShort
+ case _: IntegerType => data.toInt
+ case _: LongType => data.toLong
+ case _: FloatType => data.toFloat
+ case _: DoubleType => data.toDouble
+ case _: DateType => Date.valueOf(data)
+ case _: TimestampType => Timestamp.valueOf(data)
+ case _: BooleanType => data.toBoolean
+ case _: StringType => data
+ case _: StructType =>
+ val desiredStructSchema = desiredType.asInstanceOf[StructType]
+ new GenericRowWithSchema(parseStruct(data, desiredStructSchema),
desiredStructSchema)
+ }
Review Comment:
I couldn't find an elegant way to perform the null-safe conversion, so I
have these long `match` statements
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]