Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/20687#discussion_r173331455 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala --- @@ -331,4 +330,31 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper { .analyze comparePlans(Optimizer execute rel, expected) } + + test("SPARK-23500: Simplify complex ops that aren't at the plan root") { + val structRel = relation + .select(GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None) as "foo") + .groupBy($"foo")("1").analyze + val structExpected = relation + .select('nullable_id as "foo") + .groupBy($"foo")("1").analyze + comparePlans(Optimizer execute structRel, structExpected) + + // If nullable attributes aren't used in the 'expected' plans, the array and map test + // cases fail because array and map indexing can return null so the output attribute --- End diff -- Because `AttributeReference` is not only used as a reference of an attribute from children, but also the new attributes produced by leaf nodes, which has to carry the nullable info. It's not ideal but it's too late to change now.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org