This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 00943be  [SPARK-30829][SQL] Define LegacyBehaviorPolicy enumeration as 
the common value for result change configs
00943be is described below

commit 00943be81afbca6be13e1e72b24536cd98a788d6
Author: Yuanjian Li <xyliyuanj...@gmail.com>
AuthorDate: Tue Feb 18 00:52:05 2020 +0800

    [SPARK-30829][SQL] Define LegacyBehaviorPolicy enumeration as the common 
value for result change configs
    
    ### What changes were proposed in this pull request?
    Define a new enumeration `LegacyBehaviorPolicy` in SQLConf, it will be used 
as the common value for result change configs.
    
    ### Why are the changes needed?
    During API auditing for the 3.0 release, we found several new approaches 
that will change the results silently. For these features, we need a common 
three-value config.
    
    ### Does this PR introduce any user-facing change?
    Yes, original config `spark.sql.legacy.ctePrecedence.enabled` change to 
`spark.sql.legacy.ctePrecedencePolicy`.
    
    ### How was this patch tested?
    Existing UT.
    
    Closes #27579 from xuanyuanking/SPARK-30829.
    
    Authored-by: Yuanjian Li <xyliyuanj...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit e4a541b278b47d375252b2d0a8482c053ec3dd3e)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../sql/catalyst/analysis/CTESubstitution.scala    | 23 +++++++++++-----------
 .../org/apache/spark/sql/internal/SQLConf.scala    | 19 ++++++++++++------
 .../test/resources/sql-tests/inputs/cte-legacy.sql |  2 +-
 .../resources/sql-tests/inputs/cte-nonlegacy.sql   |  2 +-
 .../resources/sql-tests/results/cte-legacy.sql.out |  4 ++--
 .../test/resources/sql-tests/results/cte.sql.out   | 12 +++++------
 6 files changed, 35 insertions(+), 27 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
index d2be15d..c8078e1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
@@ -22,21 +22,21 @@ import 
org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, With}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.LEGACY_CTE_PRECEDENCE_ENABLED
+import org.apache.spark.sql.internal.SQLConf.{LEGACY_CTE_PRECEDENCE_POLICY, 
LegacyBehaviorPolicy}
 
 /**
  * Analyze WITH nodes and substitute child plan with CTE definitions.
  */
 object CTESubstitution extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-    val isLegacy = SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_ENABLED)
-    if (isLegacy.isEmpty) {
-      assertNoNameConflictsInCTE(plan, inTraverse = false)
-      traverseAndSubstituteCTE(plan, inTraverse = false)
-    } else if (isLegacy.get) {
-      legacyTraverseAndSubstituteCTE(plan)
-    } else {
-      traverseAndSubstituteCTE(plan, inTraverse = false)
+    
LegacyBehaviorPolicy.withName(SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_POLICY))
 match {
+      case LegacyBehaviorPolicy.EXCEPTION =>
+        assertNoNameConflictsInCTE(plan, inTraverse = false)
+        traverseAndSubstituteCTE(plan, inTraverse = false)
+      case LegacyBehaviorPolicy.LEGACY =>
+        legacyTraverseAndSubstituteCTE(plan)
+      case LegacyBehaviorPolicy.CORRECTED =>
+        traverseAndSubstituteCTE(plan, inTraverse = false)
     }
   }
 
@@ -54,8 +54,9 @@ object CTESubstitution extends Rule[LogicalPlan] {
           case (cteName, _) =>
             if (cteNames.contains(cteName)) {
               throw new AnalysisException(s"Name $cteName is ambiguous in 
nested CTE. " +
-                s"Please set ${LEGACY_CTE_PRECEDENCE_ENABLED.key} to false so 
that name defined " +
-                "in inner CTE takes precedence. See more details in 
SPARK-28228.")
+                s"Please set ${LEGACY_CTE_PRECEDENCE_POLICY.key} to CORRECTED 
so that name " +
+                "defined in inner CTE takes precedence. If set it to LEGACY, 
outer CTE " +
+                "definitions will take precedence. See more details in 
SPARK-28228.")
             } else {
               cteName
             }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index c336f87..ac3169d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2106,13 +2106,20 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val LEGACY_CTE_PRECEDENCE_ENABLED = 
buildConf("spark.sql.legacy.ctePrecedence.enabled")
+  object LegacyBehaviorPolicy extends Enumeration {
+    val EXCEPTION, LEGACY, CORRECTED = Value
+  }
+
+  val LEGACY_CTE_PRECEDENCE_POLICY = 
buildConf("spark.sql.legacy.ctePrecedencePolicy")
     .internal()
-    .doc("When true, outer CTE definitions takes precedence over inner 
definitions. If set to " +
-      "false, inner CTE definitions take precedence. The default value is 
empty, " +
-      "AnalysisException is thrown while name conflict is detected in nested 
CTE.")
-    .booleanConf
-    .createOptional
+    .doc("When LEGACY, outer CTE definitions takes precedence over inner 
definitions. If set to " +
+      "CORRECTED, inner CTE definitions take precedence. The default value is 
EXCEPTION, " +
+      "AnalysisException is thrown while name conflict is detected in nested 
CTE. This config " +
+      "will be removed in future versions and CORRECTED will be the only 
behavior.")
+    .stringConf
+    .transform(_.toUpperCase(Locale.ROOT))
+    .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
+    .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
 
   val LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC =
     buildConf("spark.sql.legacy.arrayExistsFollowsThreeValuedLogic")
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql 
b/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql
index 2f2606d..d8754d3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte-legacy.sql
@@ -2,7 +2,7 @@ create temporary view t as select * from values 0, 1, 2 as 
t(id);
 create temporary view t2 as select * from values 0, 1 as t(id);
 
 -- CTE legacy substitution
-SET spark.sql.legacy.ctePrecedence.enabled=true;
+SET spark.sql.legacy.ctePrecedencePolicy=legacy;
 
 -- CTE in CTE definition
 WITH t as (
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql 
b/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql
index b711bf3..8b9ce89 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql
@@ -1,2 +1,2 @@
---SET spark.sql.legacy.ctePrecedence.enabled = false
+--SET spark.sql.legacy.ctePrecedencePolicy = corrected
 --IMPORT cte.sql
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out 
b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
index a9709c4..a20ae55 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
@@ -19,11 +19,11 @@ struct<>
 
 
 -- !query
-SET spark.sql.legacy.ctePrecedence.enabled=true
+SET spark.sql.legacy.ctePrecedencePolicy=legacy
 -- !query schema
 struct<key:string,value:string>
 -- !query output
-spark.sql.legacy.ctePrecedence.enabled true
+spark.sql.legacy.ctePrecedencePolicy   legacy
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out 
b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
index 1d50aa8..6486fba 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
@@ -207,7 +207,7 @@ SELECT * FROM t2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner 
CTE takes precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner 
CTE takes precedence. If set it to LEGACY, outer CTE definitions will take 
precedence. See more details in SPARK-28228.;
 
 
 -- !query
@@ -226,7 +226,7 @@ SELECT * FROM t2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner 
CTE takes precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner 
CTE takes precedence. If set it to LEGACY, outer CTE definitions will take 
precedence. See more details in SPARK-28228.;
 
 
 -- !query
@@ -245,7 +245,7 @@ SELECT * FROM t2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner 
CTE takes precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner 
CTE takes precedence. If set it to LEGACY, outer CTE definitions will take 
precedence. See more details in SPARK-28228.;
 
 
 -- !query
@@ -299,7 +299,7 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner 
CTE takes precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner 
CTE takes precedence. If set it to LEGACY, outer CTE definitions will take 
precedence. See more details in SPARK-28228.;
 
 
 -- !query
@@ -314,7 +314,7 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner 
CTE takes precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner 
CTE takes precedence. If set it to LEGACY, outer CTE definitions will take 
precedence. See more details in SPARK-28228.;
 
 
 -- !query
@@ -330,7 +330,7 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner 
CTE takes precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set 
spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner 
CTE takes precedence. If set it to LEGACY, outer CTE definitions will take 
precedence. See more details in SPARK-28228.;
 
 
 -- !query


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to