Github user patrickmcgloin commented on a diff in the pull request: https://github.com/apache/spark/pull/21671#discussion_r199342210 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala --- @@ -317,16 +292,52 @@ class JacksonParser( row } + private def parseTimestamp(stringValue: String): Long = { + // This one will lose microseconds parts. + // See https://issues.apache.org/jira/browse/SPARK-10681.x + Try(options.timestampFormat.parse(stringValue).getTime * 1000L) + .getOrElse { + // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards + // compatibility. + DateTimeUtils.stringToTime(stringValue).getTime * 1000L + } + } + + private def parseDate(stringValue: String): Int = { + Try(DateTimeUtils.millisToDays(options.dateFormat.parse(stringValue).getTime)) + .orElse { + // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards + // compatibility. + Try(DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(stringValue).getTime)) + } + .getOrElse { + // In Spark 1.5.0, we store the data as number of days since epoch in string. + // So, we just convert it to Int. + stringValue.toInt + } + } + /** * Parse an object as a Map, preserving all fields. */ private def convertMap( parser: JsonParser, + keyType: DataType, fieldConverter: ValueConverter): MapData = { - val keys = ArrayBuffer.empty[UTF8String] + val keys = ArrayBuffer.empty[Any] val values = ArrayBuffer.empty[Any] while (nextUntil(parser, JsonToken.END_OBJECT)) { - keys += UTF8String.fromString(parser.getCurrentName) + + val keyValue = keyType match { + case DateType => + parseDate(parser.getCurrentName) + case TimestampType => + parseTimestamp(parser.getCurrentName) + case _ => + UTF8String.fromString(parser.getCurrentName) + } + + keys += keyValue --- End diff -- Hi @MaxGekk , @HyukjinKwon , my thoughts are that most basic types will be supported by virtue of being converted to a string and back to their correct type. I could create Unit Tests to show which types work. What I think will not work is nested types (e.g. a Seq of Ints) or perhaps a BinaryType, as I can imagine that being converted to string and back may not work well. I can add a generic way of implementing the code, as in the code snippet above, but I don't think other types will use it. The real use case here is when we have to do some formatting when reading and writing the key value.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org