This is an automated email from the ASF dual-hosted git repository. dbtsai pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 853f69a [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of `quoteIfNeeded` 853f69a is described below commit 853f69a4cf4bf138afb9075325ad175bc5f2d334 Author: DB Tsai <d_t...@apple.com> AuthorDate: Fri Mar 6 00:13:57 2020 +0000 [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of `quoteIfNeeded` ### What changes were proposed in this pull request? There are two implementation of quoteIfNeeded. One is in `org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quote` and the other is in `OrcFiltersBase.quoteAttributeNameIfNeeded`. This PR will consolidate them into one. ### Why are the changes needed? Simplify the codebase. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Existing UTs. Closes #27814 from dbtsai/SPARK-31058. Authored-by: DB Tsai <d_t...@apple.com> Signed-off-by: DB Tsai <d_t...@apple.com> (cherry picked from commit fe126a6a05b0de8db68c8f890e876ce96bf194ca) Signed-off-by: DB Tsai <d_t...@apple.com> --- .../spark/sql/connector/catalog/IdentifierImpl.java | 2 +- .../sql/connector/catalog/CatalogV2Implicits.scala | 10 +++++----- .../spark/sql/connector/catalog/V1Table.scala | 13 +++---------- .../catalyst/analysis/ResolveSessionCatalog.scala | 2 +- .../execution/datasources/orc/OrcFiltersBase.scala | 10 ---------- .../sql/execution/datasources/orc/OrcFilters.scala | 21 ++++++++++++--------- .../sql/execution/datasources/orc/OrcFilters.scala | 21 ++++++++++++--------- 7 files changed, 34 insertions(+), 45 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java index a56007b..30596d9 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java @@ -55,7 +55,7 @@ class IdentifierImpl implements Identifier { @Override public String toString() { return Stream.concat(Stream.of(namespace), Stream.of(name)) - .map(CatalogV2Implicits::quote) + .map(CatalogV2Implicits::quoteIfNeeded) .collect(Collectors.joining(".")); } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index 3478af8..71bab62 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -84,15 +84,15 @@ private[sql] object CatalogV2Implicits { } implicit class NamespaceHelper(namespace: Array[String]) { - def quoted: String = namespace.map(quote).mkString(".") + def quoted: String = namespace.map(quoteIfNeeded).mkString(".") } implicit class IdentifierHelper(ident: Identifier) { def quoted: String = { if (ident.namespace.nonEmpty) { - ident.namespace.map(quote).mkString(".") + "." + quote(ident.name) + ident.namespace.map(quoteIfNeeded).mkString(".") + "." + quoteIfNeeded(ident.name) } else { - quote(ident.name) + quoteIfNeeded(ident.name) } } @@ -122,10 +122,10 @@ private[sql] object CatalogV2Implicits { s"$quoted is not a valid TableIdentifier as it has more than 2 name parts.") } - def quoted: String = parts.map(quote).mkString(".") + def quoted: String = parts.map(quoteIfNeeded).mkString(".") } - def quote(part: String): String = { + def quoteIfNeeded(part: String): String = { if (part.contains(".") || part.contains("`")) { s"`${part.replace("`", "``")}`" } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 91e0c58..70fc968 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -24,6 +24,7 @@ import scala.collection.mutable import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform} import org.apache.spark.sql.types.StructType @@ -35,20 +36,12 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { def quoted: String = { identifier.database match { case Some(db) => - Seq(db, identifier.table).map(quote).mkString(".") + Seq(db, identifier.table).map(quoteIfNeeded).mkString(".") case _ => - quote(identifier.table) + quoteIfNeeded(identifier.table) } } - - private def quote(part: String): String = { - if (part.contains(".") || part.contains("`")) { - s"`${part.replace("`", "``")}`" - } else { - part - } - } } def catalogTable: CatalogTable = v1Table diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 7950b61..90a3fb8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -95,7 +95,7 @@ class ResolveSessionCatalog( .map(_._2.dataType) .getOrElse { throw new AnalysisException( - s"ALTER COLUMN cannot find column ${quote(colName)} in v1 table. " + + s"ALTER COLUMN cannot find column ${quoteIfNeeded(colName)} in v1 table. " + s"Available: ${v1Table.schema.fieldNames.mkString(", ")}") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala index 0b56587..e673309 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala @@ -36,16 +36,6 @@ trait OrcFiltersBase { } } - // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters - // in order to distinguish predicate pushdown for nested columns. - protected[sql] def quoteAttributeNameIfNeeded(name: String) : String = { - if (!name.contains("`") && name.contains(".")) { - s"`$name`" - } else { - name - } - } - /** * Return true if this is a searchable type in ORC. * Both CharType and VarcharType are cleaned at AstBuilder. diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala index 995c5ed..b9cbc48 100644 --- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala @@ -24,6 +24,7 @@ import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder import org.apache.orc.storage.serde2.io.HiveDecimalWritable import org.apache.spark.SparkException +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded import org.apache.spark.sql.sources.Filter import org.apache.spark.sql.types._ @@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase { // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). + // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters + // in order to distinguish predicate pushdown for nested columns. expression match { case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end()) case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end()) case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end()) case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end()) case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end()) case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end()) case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) Some(builder.startAnd().isNull(quotedName, getType(attribute)).end()) case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) Some(builder.startNot().isNull(quotedName, getType(attribute)).end()) case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute))) Some(builder.startAnd().in(quotedName, getType(attribute), castedValues.map(_.asInstanceOf[AnyRef]): _*).end()) diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala index 948ab44..6e9e592 100644 --- a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable import org.apache.spark.SparkException +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded import org.apache.spark.sql.sources.Filter import org.apache.spark.sql.types._ @@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase { // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). + // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters + // in order to distinguish predicate pushdown for nested columns. expression match { case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end()) case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end()) case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end()) case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end()) case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end()) case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end()) case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) Some(builder.startAnd().isNull(quotedName, getType(attribute)).end()) case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) Some(builder.startNot().isNull(quotedName, getType(attribute)).end()) case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute))) Some(builder.startAnd().in(quotedName, getType(attribute), castedValues.map(_.asInstanceOf[AnyRef]): _*).end()) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org