Github user marmbrus commented on a diff in the pull request:

    https://github.com/apache/spark/pull/999#discussion_r13520486
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala 
---
    @@ -0,0 +1,371 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql.json
    +
    +import org.apache.spark.sql.QueryTest
    +import org.apache.spark.sql.test.TestSQLContext._
    +import org.apache.spark.sql.catalyst.expressions.{ExprId, 
AttributeReference, Attribute}
    +import org.apache.spark.sql.catalyst.plans.generateSchemaTreeString
    +import org.apache.spark.sql.catalyst.types._
    +import org.apache.spark.sql.catalyst.util._
    +
    +class JsonSuite extends QueryTest {
    +  import TestJsonData._
    +  TestJsonData
    +
    +  /**
    +   * Since attribute references are given globally unique ids during 
analysis,
    +   * we must normalize them to check if two different queries are 
identical.
    +   */
    +  protected def normalizeExprIds(attributes: Seq[Attribute]) = {
    +    val minId = attributes.map(_.exprId.id).min
    +    attributes.map {
    +      case a: AttributeReference =>
    +        AttributeReference(a.name, a.dataType, a.nullable)(exprId = 
ExprId(a.exprId.id - minId))
    +    }
    +  }
    +
    +  protected def checkSchema(expected: Seq[Attribute], actual: 
Seq[Attribute]): Unit = {
    +    val normalizedExpected = normalizeExprIds(expected).toSeq
    +    val normalizedActual = normalizeExprIds(actual).toSeq
    +    if (normalizedExpected != normalizedActual) {
    +      fail(
    +        s"""
    +          |=== FAIL: Schemas do not match ===
    +          |${sideBySide(
    +              s"== Expected Schema ==\n" +
    +              generateSchemaTreeString(normalizedExpected),
    +              s"==  Actual Schema  ==\n" +
    +              generateSchemaTreeString(normalizedActual)).mkString("\n")}
    +        """.stripMargin)
    +    }
    +  }
    +
    +  test("Primitive field and type inferring") {
    +    val jsonSchemaRDD = jsonRDD(primitiveFieldAndType)
    +
    +    val expectedSchema =
    +      AttributeReference("bigInteger", DecimalType, true)() ::
    +      AttributeReference("boolean", BooleanType, true)() ::
    +      AttributeReference("double", DoubleType, true)() ::
    +      AttributeReference("integer", IntegerType, true)() ::
    +      AttributeReference("long", LongType, true)() ::
    +      AttributeReference("null", StringType, true)() ::
    +      AttributeReference("string", StringType, true)() :: Nil
    +
    +    checkSchema(expectedSchema, jsonSchemaRDD.logicalPlan.output)
    +
    +    jsonSchemaRDD.registerAsTable("jsonTable")
    +
    +    checkAnswer(
    +      sql("select * from jsonTable"),
    +      (BigDecimal("92233720368547758070"),
    +      true,
    +      1.7976931348623157E308,
    +      10,
    +      21474836470L,
    +      null,
    +      "this is a simple string.") :: Nil
    +    )
    +  }
    +
    +  test("Complex field and type inferring") {
    +    val jsonSchemaRDD = jsonRDD(complexFieldAndType)
    +
    +    val expectedSchema =
    +      AttributeReference("arrayOfArray1", 
ArrayType(ArrayType(StringType)), true)() ::
    +      AttributeReference("arrayOfArray2", 
ArrayType(ArrayType(DoubleType)), true)() ::
    +      AttributeReference("arrayOfBigInteger", ArrayType(DecimalType), 
true)() ::
    +      AttributeReference("arrayOfBoolean", ArrayType(BooleanType), true)() 
::
    +      AttributeReference("arrayOfDouble", ArrayType(DoubleType), true)() ::
    +      AttributeReference("arrayOfInteger", ArrayType(IntegerType), true)() 
::
    +      AttributeReference("arrayOfLong", ArrayType(LongType), true)() ::
    +      AttributeReference("arrayOfNull", ArrayType(StringType), true)() ::
    +      AttributeReference("arrayOfString", ArrayType(StringType), true)() ::
    +      AttributeReference("arrayOfStruct", ArrayType(
    +        StructType(StructField("field1", BooleanType, true) ::
    +                   StructField("field2", StringType, true) :: Nil)), 
true)() ::
    +      AttributeReference("struct", StructType(
    +        StructField("field1", BooleanType, true) ::
    +        StructField("field2", DecimalType, true) :: Nil), true)() ::
    +      AttributeReference("structWithArrayFields", StructType(
    +        StructField("field1", ArrayType(IntegerType), true) ::
    +        StructField("field2", ArrayType(StringType), true) :: Nil), 
true)() :: Nil
    +
    +    checkSchema(expectedSchema, jsonSchemaRDD.logicalPlan.output)
    +
    +    jsonSchemaRDD.registerAsTable("jsonTable")
    +
    +    // Access elements of a primitive array.
    +    checkAnswer(
    +      sql("select arrayOfString[0], arrayOfString[1], arrayOfString[2] 
from jsonTable"),
    +      ("str1", "str2", null) :: Nil
    +    )
    +
    +    // Access an array of null values.
    +    checkAnswer(
    +      sql("select arrayOfNull from jsonTable"),
    +      Seq(Seq(null, null, null, null)) :: Nil
    +    )
    +
    +    // Access elements of a BigInteger array (we use DecimalType 
internally).
    +    checkAnswer(
    +      sql("select arrayOfBigInteger[0], arrayOfBigInteger[1], 
arrayOfBigInteger[2] from jsonTable"),
    +      (BigDecimal("922337203685477580700"), 
BigDecimal("-922337203685477580800"), null) :: Nil
    +    )
    +
    +    // Access elements of an array of arrays.
    +    checkAnswer(
    +      sql("select arrayOfArray1[0], arrayOfArray1[1] from jsonTable"),
    +      (Seq("1", "2", "3"), Seq("str1", "str2")) :: Nil
    +    )
    +
    +    // Access elements of an array of arrays.
    +    checkAnswer(
    +      sql("select arrayOfArray2[0], arrayOfArray2[1] from jsonTable"),
    +      (Seq(1.0, 2.0, 3.0), Seq(1.1, 2.1, 3.1)) :: Nil
    +    )
    +
    +    // Access elements of an array inside a filed with the type of 
ArrayType(ArrayType).
    +    checkAnswer(
    +      sql("select arrayOfArray1[1][1], arrayOfArray2[1][1] from 
jsonTable"),
    +      ("str2", 2.1) :: Nil
    +    )
    +
    +    // Access elements of an array of structs.
    +    checkAnswer(
    +      sql("select arrayOfStruct[0], arrayOfStruct[1], arrayOfStruct[2] 
from jsonTable"),
    +      (true :: "str1" :: Nil, false :: null :: Nil, null) :: Nil
    +    )
    +
    +    /*
    +    // Right now, "field1" and "field2" are treated as aliases. We should 
fix it.
    +    // TODO: Re-enable the following test.
    +    checkAnswer(
    +      sql("select arrayOfStruct[0].field1, arrayOfStruct[0].field2 from 
jsonTable"),
    +      (true, "str1") :: Nil
    +    )
    +    */
    +
    +    /*
    +    // Right now, the analyzer cannot resolve arrayOfStruct.field1 and 
arrayOfStruct.field2.
    +    // TODO: Re-enable the following test.
    +    // Getting all values of a specific field from an array of structs.
    +    checkAnswer(
    +      sql("select arrayOfStruct.field1, arrayOfStruct.field2 from 
jsonTable"),
    +      (Seq(true, false), Seq("str1", null)) :: Nil
    +    )
    +    */
    +
    +    // Access a struct and fields inside of it.
    +    checkAnswer(
    +      sql("select struct, struct.field1, struct.field2 from jsonTable"),
    +      (
    +        Seq(true, BigDecimal("92233720368547758070")),
    +        true,
    +        BigDecimal("92233720368547758070")) :: Nil
    +    )
    +
    +    // Access an array field of a struct.
    +    checkAnswer(
    +      sql("select structWithArrayFields.field1, 
structWithArrayFields.field2 from jsonTable"),
    +      (Seq(4, 5, 6), Seq("str1", "str2")) :: Nil
    +    )
    +
    +    // Access elements of an array field of a struct.
    +    checkAnswer(
    +      sql("select structWithArrayFields.field1[1], 
structWithArrayFields.field2[3] from jsonTable"),
    +      (5, null) :: Nil
    +    )
    +  }
    +
    +  test("Type conflict in primitive field values") {
    +    val jsonSchemaRDD = jsonRDD(primitiveFieldValueTypeConflict)
    +
    +    val expectedSchema =
    +      AttributeReference("num_bool", StringType, true)() ::
    +      AttributeReference("num_num_1", LongType, true)() ::
    +      AttributeReference("num_num_2", DecimalType, true)() ::
    +      AttributeReference("num_num_3", DoubleType, true)() ::
    +      AttributeReference("num_str", StringType, true)() ::
    +      AttributeReference("str_bool", StringType, true)() :: Nil
    +
    +    checkSchema(expectedSchema, jsonSchemaRDD.logicalPlan.output)
    +
    +    jsonSchemaRDD.registerAsTable("jsonTable")
    +
    +    checkAnswer(
    +      sql("select * from jsonTable"),
    +      ("true", 11L, null, 1.1, "13.1", "str1") ::
    +      ("12", null, BigDecimal("21474836470.9"), null, null, "true") ::
    +      ("false", 21474836470L, BigDecimal("92233720368547758070"), 100, 
"str1", "false") ::
    +      (null, 21474836570L, BigDecimal(1.1), 21474836470L, 
"92233720368547758070", null) :: Nil
    +    )
    +
    +    // Number and Boolean conflict: resolve the type as number in this 
query.
    +    checkAnswer(
    +      sql("select num_bool - 10 from jsonTable where num_bool > 11"),
    +      2
    +    )
    +
    +    /*
    +    // Right now, the analyzer does not insert a Cast for num_bool.
    --- End diff --
    
    What do you mean here?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to