[GitHub] spark pull request #14083: [SPARK-16406][SQL] Improve performance of Logical...

cloud-fan Sun, 22 Apr 2018 19:33:29 -0700

Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/14083#discussion_r183265525
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
 ---
    @@ -138,6 +140,88 @@ package object expressions  {
         def indexOf(exprId: ExprId): Int = {
           Option(exprIdToOrdinal.get(exprId)).getOrElse(-1)
         }
    +
    +    private def unique[T](m: Map[T, Seq[Attribute]]): Map[T, 
Seq[Attribute]] = {
    +      m.mapValues(_.distinct).map(identity)
    +    }
    +
    +    /** Map to use for direct case insensitive attribute lookups. */
    +    @transient private lazy val direct: Map[String, Seq[Attribute]] = {
    +      unique(attrs.groupBy(_.name.toLowerCase))
    +    }
    +
    +    /** Map to use for qualified case insensitive attribute lookups. */
    +    @transient private val qualified: Map[(String, String), 
Seq[Attribute]] = {
    +      val grouped = attrs.filter(_.qualifier.isDefined).groupBy { a =>
    +        (a.qualifier.get.toLowerCase, a.name.toLowerCase)
    +      }
    +      unique(grouped)
    +    }
    +
    +    /** Perform attribute resolution given a name and a resolver. */
    +    def resolve(nameParts: Seq[String], resolver: Resolver): 
Option[NamedExpression] = {
    +      // Collect matching attributes given a name and a lookup.
    +      def collectMatches(name: String, candidates: 
Option[Seq[Attribute]]): Seq[Attribute] = {
    +        candidates.toSeq.flatMap(_.collect {
    +          case a if resolver(a.name, name) => a.withName(name)
    +        })
    +      }
    +
    +      // Find matches for the given name assuming that the 1st part is a 
qualifier (i.e. table name,
    +      // alias, or subquery alias) and the 2nd part is the actual name. 
This returns a tuple of
    +      // matched attributes and a list of parts that are to be resolved.
    +      //
    +      // For example, consider an example where "a" is the table name, "b" 
is the column name,
    +      // and "c" is the struct field name, i.e. "a.b.c". In this case, 
Attribute will be "a.b",
    +      // and the second element will be List("c").
    +      val matches = nameParts match {
    +        case qualifier +: name +: nestedFields =>
    +          val key = (qualifier.toLowerCase, name.toLowerCase)
    +          val attributes = collectMatches(name, qualified.get(key)).filter 
{ a =>
    +            resolver(qualifier, a.qualifier.get)
    +          }
    +          (attributes, nestedFields)
    +        case all =>
    +          (Nil, all)
    +      }
    +
    +      // If none of attributes match `table.column` pattern, we try to 
resolve it as a column.
    +      val (candidates, nestedFields) = matches match {
    +        case (Seq(), _) =>
    +          val name = nameParts.head
    +          val attributes = collectMatches(name, 
direct.get(name.toLowerCase))
    +          (attributes, nameParts.tail)
    +        case _ => matches
    +      }
    +
    +      def name = UnresolvedAttribute(nameParts).name
    +      candidates match {
    +        case Seq(a) if nestedFields.nonEmpty =>
    +          // One match, but we also need to extract the requested nested 
field.
    +          // The foldLeft adds ExtractValues for every remaining parts of 
the identifier,
    +          // and aliased it with the last part of the name.
    +          // For example, consider "a.b.c", where "a" is resolved to an 
existing attribute.
    +          // Then this will add ExtractValue("c", ExtractValue("b", a)), 
and alias the final
    +          // expression as "c".
    +          val fieldExprs = nestedFields.foldLeft(a: Expression) { (e, 
name) =>
    +            ExtractValue(e, Literal(name), resolver)
    +          }
    +          Some(Alias(fieldExprs, nestedFields.last)())
    +
    +        case Seq(a) =>
    +          // One match, no nested fields, use it.
    +          Some(a)
    +
    +        case Seq() =>
    +          // No matches.
    +          None
    +
    +        case ambiguousReferences =>
    +          // More than one match.
    +          val referenceNames = ambiguousReferences.mkString(", ")
    --- End diff --
    
    to pass the test, we should follow the previous code: 
`ambiguousReferences.map(_._1.qualifiedName).mkString(", ")`



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #14083: [SPARK-16406][SQL] Improve performance of Logical...

Reply via email to