viirya commented on a change in pull request #26013: [SPARK-29347][SQL] Add 
JSON serialization for external Rows
URL: https://github.com/apache/spark/pull/26013#discussion_r333711709
 
 

 ##########
 File path: sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
 ##########
 @@ -501,4 +513,88 @@ trait Row extends Serializable {
   private def getAnyValAs[T <: AnyVal](i: Int): T =
     if (isNullAt(i)) throw new NullPointerException(s"Value at index $i is 
null")
     else getAs[T](i)
+
+  /** The compact JSON representation of this row. */
+  def json: String = compact(jsonValue)
+
+  /** The pretty (i.e. indented) JSON representation of this row. */
+  def prettyJson: String = pretty(render(jsonValue))
+
+  /**
+   * JSON representation of the row.
+   *
+   * Note that this only supports the data types that are also supported by
+   * [[org.apache.spark.sql.catalyst.encoders.RowEncoder]].
+   *
+   * @return the JSON representation of the row.
+   */
+  private[sql] def jsonValue: JValue = {
+    require(schema != null, "JSON serialization requires a non-null schema.")
+
+    lazy val timeZone = TimeZone.getTimeZone(SQLConf.get.sessionLocalTimeZone)
+    lazy val dateFormatter = DateFormatter.apply(timeZone.toZoneId)
+    lazy val timestampFormatter = TimestampFormatter.apply(timeZone.toZoneId)
+
+    // Convert an iterator of values to a json array
+    def iteratorToJsonArray(iterator: Iterator[_], elementType: DataType): 
JArray = {
+      JArray(iterator.map(toJson(_, elementType)).toList)
+    }
+
+    // Convert a value to json.
+    def toJson(value: Any, dataType: DataType): JValue = (value, dataType) 
match {
+      case (null, _) => JNull
+      case (b: Boolean, _) => JBool(b)
+      case (b: Byte, _) => JLong(b)
+      case (s: Short, _) => JLong(s)
+      case (i: Int, _) => JLong(i)
+      case (l: Long, _) => JLong(l)
+      case (f: Float, _) => JDouble(f)
+      case (d: Double, _) => JDouble(d)
+      case (d: BigDecimal, _) => JDecimal(d)
+      case (d: java.math.BigDecimal, _) => JDecimal(d)
+      case (d: Decimal, _) => JDecimal(d.toBigDecimal)
+      case (s: String, _) => JString(s)
+      case (b: Array[Byte], BinaryType) =>
+        JString(Base64.getEncoder.encodeToString(b))
+      case (d: LocalDate, _) =>
+        JString(dateFormatter.format(DateTimeUtils.localDateToDays(d)))
+      case (d: Date, _) =>
+        JString(dateFormatter.format(DateTimeUtils.fromJavaDate(d)))
+      case (i: Instant, _) =>
+        JString(timestampFormatter.format(DateTimeUtils.instantToMicros(i)))
+      case (t: Timestamp, _) =>
+        JString(timestampFormatter.format(DateTimeUtils.fromJavaTimestamp(t)))
+      case (a: Array[_], ArrayType(elementType, _)) =>
+        iteratorToJsonArray(a.iterator, elementType)
+      case (s: Seq[_], ArrayType(elementType, _)) =>
+        iteratorToJsonArray(s.iterator, elementType)
+      case (m: Map[String @unchecked, _], MapType(StringType, valueType, _)) =>
 
 Review comment:
   Can other primitive types like Int be good for this format too?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to