[GitHub] spark pull request: [SPARK-3713][SQL] Uses JSON to serialize DataT...

marmbrus Wed, 01 Oct 2014 14:47:03 -0700

Github user marmbrus commented on a diff in the pull request:

    https://github.com/apache/spark/pull/2563#discussion_r18310445
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala 
---
    @@ -19,71 +19,127 @@ package org.apache.spark.sql.catalyst.types
     
     import java.sql.Timestamp
     
    -import scala.math.Numeric.{FloatAsIfIntegral, BigDecimalAsIfIntegral, 
DoubleAsIfIntegral}
    +import scala.math.Numeric.{BigDecimalAsIfIntegral, DoubleAsIfIntegral, 
FloatAsIfIntegral}
     import scala.reflect.ClassTag
    -import scala.reflect.runtime.universe.{typeTag, TypeTag, runtimeMirror}
    +import scala.reflect.runtime.universe.{TypeTag, runtimeMirror, typeTag}
     import scala.util.parsing.combinator.RegexParsers
     
    +import org.json4s.JsonAST.JValue
    +import org.json4s._
    +import org.json4s.JsonDSL._
    +import org.json4s.jackson.JsonMethods._
    +
     import org.apache.spark.sql.catalyst.ScalaReflectionLock
     import org.apache.spark.sql.catalyst.expressions.{Attribute, 
AttributeReference, Expression}
     import org.apache.spark.util.Utils
     
    -/**
    - * Utility functions for working with DataTypes.
    - */
    -object DataType extends RegexParsers {
    -  protected lazy val primitiveType: Parser[DataType] =
    -    "StringType" ^^^ StringType |
    -    "FloatType" ^^^ FloatType |
    -    "IntegerType" ^^^ IntegerType |
    -    "ByteType" ^^^ ByteType |
    -    "ShortType" ^^^ ShortType |
    -    "DoubleType" ^^^ DoubleType |
    -    "LongType" ^^^ LongType |
    -    "BinaryType" ^^^ BinaryType |
    -    "BooleanType" ^^^ BooleanType |
    -    "DecimalType" ^^^ DecimalType |
    -    "TimestampType" ^^^ TimestampType
    -
    -  protected lazy val arrayType: Parser[DataType] =
    -    "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
    -      case tpe ~ _ ~ containsNull => ArrayType(tpe, containsNull)
    -    }
     
    -  protected lazy val mapType: Parser[DataType] =
    -    "MapType" ~> "(" ~> dataType ~ "," ~ dataType ~ "," ~ boolVal <~ ")" 
^^ {
    -      case t1 ~ _ ~ t2 ~ _ ~ valueContainsNull => MapType(t1, t2, 
valueContainsNull)
    -    }
    +object DataType {
    +  def fromJson(json: String): DataType = parseDataType(parse(json))
     
    -  protected lazy val structField: Parser[StructField] =
    -    ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> 
boolVal <~ ")") ^^ {
    -      case name ~ tpe ~ nullable  =>
    -          StructField(name, tpe, nullable = nullable)
    +  private object JSortedObject {
    +    def unapplySeq(value: JValue): Option[List[(String, JValue)]] = value 
match {
    +      case JObject(seq) => Some(seq.toList.sortBy(_._1))
    +      case _ => None
         }
    +  }
     
    -  protected lazy val boolVal: Parser[Boolean] =
    -    "true" ^^^ true |
    -    "false" ^^^ false
    +  private def parseDataType(asJValue: JValue): DataType = asJValue match {
    +    case JString("boolean") => BooleanType
    +    case JString("byte") => ByteType
    +    case JString("short") => ShortType
    +    case JString("integer") => IntegerType
    +    case JString("long") => LongType
    +    case JString("float") => FloatType
    +    case JString("double") => DoubleType
    +    case JString("decimal") => DecimalType
    +    case JString("string") => StringType
    +    case JString("binary") => BinaryType
    +    case JString("timestamp") => TimestampType
    +    case JString("null") => NullType
    +    case JObject(List(("array", JSortedObject(
    +        ("containsNull", JBool(n)), ("type", t: JValue))))) =>
    +      ArrayType(parseDataType(t), n)
    +    case JObject(List(("struct", JObject(List(("fields", 
JArray(fields))))))) =>
    +      StructType(fields.map(parseStructField))
    +    case JObject(List(("map", JSortedObject(
    +        ("key", k: JValue), ("value", v: JValue), ("valueContainsNull", 
JBool(n)))))) =>
    +      MapType(parseDataType(k), parseDataType(v), n)
    +  }
     
    -  protected lazy val structType: Parser[DataType] =
    -    "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
    -      case fields => new StructType(fields)
    -    }
    +  private def parseStructField(asJValue: JValue): StructField = asJValue 
match {
    +    case JObject(Seq(("field", JSortedObject(
    +        ("name", JString(name)),
    +        ("nullable", JBool(nullable)),
    +        ("type", dataType: JValue))))) =>
    +      StructField(name, parseDataType(dataType), nullable)
    +  }
     
    -  protected lazy val dataType: Parser[DataType] =
    -    arrayType |
    -      mapType |
    -      structType |
    -      primitiveType
    +  @deprecated("Use DataType.fromJson instead")
    +  def fromCaseClassString(string: String): DataType = 
CaseClassStringParser(string)
     
       /**
    -   * Parses a string representation of a DataType.
    -   *
    -   * TODO: Generate parser as pickler...
    +   * Utility functions for working with DataTypes.
    --- End diff --
    
    I think this comment is in the wrong place.  We should probably note that 
this parser is deprecated and is only here for backwards compatibility.  We 
might even print a warning when it is used so we can get rid of it eventually.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request: [SPARK-3713][SQL] Uses JSON to serialize DataT...

Reply via email to