Github user carsonwang commented on a diff in the pull request: https://github.com/apache/spark/pull/17540#discussion_r114238664 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala --- @@ -73,21 +99,35 @@ object SQLExecution { } r } else { - // Don't support nested `withNewExecutionId`. This is an example of the nested - // `withNewExecutionId`: + // Nesting `withNewExecutionId` may be incorrect; log a warning. + // + // This is an example of the nested `withNewExecutionId`: // // class DataFrame { + // // Note: `collect` will call withNewExecutionId // def foo: T = withNewExecutionId { something.createNewDataFrame().collect() } // } // - // Note: `collect` will call withNewExecutionId // In this case, only the "executedPlan" for "collect" will be executed. The "executedPlan" - // for the outer DataFrame won't be executed. So it's meaningless to create a new Execution - // for the outer DataFrame. Even if we track it, since its "executedPlan" doesn't run, + // for the outer Dataset won't be executed. So it's meaningless to create a new Execution + // for the outer Dataset. Even if we track it, since its "executedPlan" doesn't run, // all accumulator metrics will be 0. It will confuse people if we show them in Web UI. // - // A real case is the `DataFrame.count` method. - throw new IllegalArgumentException(s"$EXECUTION_ID_KEY is already set") + // Some operations will start nested executions. For example, CacheTableCommand will uses + // Dataset#count to materialize cached records when caching is not lazy. Because there are + // legitimate reasons to nest executions in withNewExecutionId, this logs a warning but does + // not throw an exception to avoid failing at runtime. Exceptions will be thrown for tests + // to ensure that nesting is avoided. + // + // To avoid this warning, use nested { ... } + if (!Option(sc.getLocalProperty(ALLOW_NESTED_EXECUTION)).exists(_.toBoolean)) { --- End diff -- just ignore this.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org