This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 00943be [SPARK-30829][SQL] Define LegacyBehaviorPolicy enumeration as the common value for result change configs 00943be is described below commit 00943be81afbca6be13e1e72b24536cd98a788d6 Author: Yuanjian Li <xyliyuanj...@gmail.com> AuthorDate: Tue Feb 18 00:52:05 2020 +0800 [SPARK-30829][SQL] Define LegacyBehaviorPolicy enumeration as the common value for result change configs ### What changes were proposed in this pull request? Define a new enumeration `LegacyBehaviorPolicy` in SQLConf, it will be used as the common value for result change configs. ### Why are the changes needed? During API auditing for the 3.0 release, we found several new approaches that will change the results silently. For these features, we need a common three-value config. ### Does this PR introduce any user-facing change? Yes, original config `spark.sql.legacy.ctePrecedence.enabled` change to `spark.sql.legacy.ctePrecedencePolicy`. ### How was this patch tested? Existing UT. Closes #27579 from xuanyuanking/SPARK-30829. Authored-by: Yuanjian Li <xyliyuanj...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit e4a541b278b47d375252b2d0a8482c053ec3dd3e) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql/catalyst/analysis/CTESubstitution.scala | 23 +++++++++++----------- .../org/apache/spark/sql/internal/SQLConf.scala | 19 ++++++++++++------ .../test/resources/sql-tests/inputs/cte-legacy.sql | 2 +- .../resources/sql-tests/inputs/cte-nonlegacy.sql | 2 +- .../resources/sql-tests/results/cte-legacy.sql.out | 4 ++-- .../test/resources/sql-tests/results/cte.sql.out | 12 +++++------ 6 files changed, 35 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala index d2be15d..c8078e1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala @@ -22,21 +22,21 @@ import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, With} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.internal.SQLConf.LEGACY_CTE_PRECEDENCE_ENABLED +import org.apache.spark.sql.internal.SQLConf.{LEGACY_CTE_PRECEDENCE_POLICY, LegacyBehaviorPolicy} /** * Analyze WITH nodes and substitute child plan with CTE definitions. */ object CTESubstitution extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { - val isLegacy = SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_ENABLED) - if (isLegacy.isEmpty) { - assertNoNameConflictsInCTE(plan, inTraverse = false) - traverseAndSubstituteCTE(plan, inTraverse = false) - } else if (isLegacy.get) { - legacyTraverseAndSubstituteCTE(plan) - } else { - traverseAndSubstituteCTE(plan, inTraverse = false) + LegacyBehaviorPolicy.withName(SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match { + case LegacyBehaviorPolicy.EXCEPTION => + assertNoNameConflictsInCTE(plan, inTraverse = false) + traverseAndSubstituteCTE(plan, inTraverse = false) + case LegacyBehaviorPolicy.LEGACY => + legacyTraverseAndSubstituteCTE(plan) + case LegacyBehaviorPolicy.CORRECTED => + traverseAndSubstituteCTE(plan, inTraverse = false) } } @@ -54,8 +54,9 @@ object CTESubstitution extends Rule[LogicalPlan] { case (cteName, _) => if (cteNames.contains(cteName)) { throw new AnalysisException(s"Name $cteName is ambiguous in nested CTE. " + - s"Please set ${LEGACY_CTE_PRECEDENCE_ENABLED.key} to false so that name defined " + - "in inner CTE takes precedence. See more details in SPARK-28228.") + s"Please set ${LEGACY_CTE_PRECEDENCE_POLICY.key} to CORRECTED so that name " + + "defined in inner CTE takes precedence. If set it to LEGACY, outer CTE " + + "definitions will take precedence. See more details in SPARK-28228.") } else { cteName } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index c336f87..ac3169d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2106,13 +2106,20 @@ object SQLConf { .booleanConf .createWithDefault(false) - val LEGACY_CTE_PRECEDENCE_ENABLED = buildConf("spark.sql.legacy.ctePrecedence.enabled") + object LegacyBehaviorPolicy extends Enumeration { + val EXCEPTION, LEGACY, CORRECTED = Value + } + + val LEGACY_CTE_PRECEDENCE_POLICY = buildConf("spark.sql.legacy.ctePrecedencePolicy") .internal() - .doc("When true, outer CTE definitions takes precedence over inner definitions. If set to " + - "false, inner CTE definitions take precedence. The default value is empty, " + - "AnalysisException is thrown while name conflict is detected in nested CTE.") - .booleanConf - .createOptional + .doc("When LEGACY, outer CTE definitions takes precedence over inner definitions. If set to " + + "CORRECTED, inner CTE definitions take precedence. The default value is EXCEPTION, " + + "AnalysisException is thrown while name conflict is detected in nested CTE. This config " + + "will be removed in future versions and CORRECTED will be the only behavior.") + .stringConf + .transform(_.toUpperCase(Locale.ROOT)) + .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) + .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) val LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC = buildConf("spark.sql.legacy.arrayExistsFollowsThreeValuedLogic") diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql index 2f2606d..d8754d3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql @@ -2,7 +2,7 @@ create temporary view t as select * from values 0, 1, 2 as t(id); create temporary view t2 as select * from values 0, 1 as t(id); -- CTE legacy substitution -SET spark.sql.legacy.ctePrecedence.enabled=true; +SET spark.sql.legacy.ctePrecedencePolicy=legacy; -- CTE in CTE definition WITH t as ( diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql index b711bf3..8b9ce89 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql @@ -1,2 +1,2 @@ ---SET spark.sql.legacy.ctePrecedence.enabled = false +--SET spark.sql.legacy.ctePrecedencePolicy = corrected --IMPORT cte.sql diff --git a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out index a9709c4..a20ae55 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out @@ -19,11 +19,11 @@ struct<> -- !query -SET spark.sql.legacy.ctePrecedence.enabled=true +SET spark.sql.legacy.ctePrecedencePolicy=legacy -- !query schema struct<key:string,value:string> -- !query output -spark.sql.legacy.ctePrecedence.enabled true +spark.sql.legacy.ctePrecedencePolicy legacy -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index 1d50aa8..6486fba 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -207,7 +207,7 @@ SELECT * FROM t2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -226,7 +226,7 @@ SELECT * FROM t2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -245,7 +245,7 @@ SELECT * FROM t2 struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -299,7 +299,7 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -314,7 +314,7 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query @@ -330,7 +330,7 @@ SELECT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.; +Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.; -- !query --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org