Repository: spark Updated Branches: refs/heads/branch-2.0 9f55951a1 -> 4e2a53ba4
[SPARK-13135] [SQL] Don't print expressions recursively in generated code ## What changes were proposed in this pull request? This PR is an up-to-date and a little bit improved version of #11019 of rxin for - (1) preventing recursive printing of expressions in generated code. Since the major function of this PR is indeed the above, he should be credited for the work he did. In addition to #11019, this PR improves the followings in code generation. - (2) Improve multiline comment indentation. - (3) Reduce the number of empty lines (mainly consecutive empty lines). - (4) Remove all space characters on empty lines. **Example** ```scala spark.range(1, 1000).select('id+1+2+3, 'id+4+5+6) ``` **Before** ``` Generated code: /* 001 */ public Object generate(Object[] references) { ... /* 005 */ /** /* 006 */ * Codegend pipeline for /* 007 */ * Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 008 */ * +- Range 1, 1, 8, 999, [id#0L] /* 009 */ */ ... /* 075 */ // PRODUCE: Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 076 */ /* 077 */ // PRODUCE: Range 1, 1, 8, 999, [id#0L] /* 078 */ /* 079 */ // initialize Range ... /* 092 */ // CONSUME: Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 093 */ /* 094 */ // CONSUME: WholeStageCodegen /* 095 */ /* 096 */ // (((input[0, bigint, false] + 1) + 2) + 3) /* 097 */ // ((input[0, bigint, false] + 1) + 2) /* 098 */ // (input[0, bigint, false] + 1) ... /* 107 */ // (((input[0, bigint, false] + 4) + 5) + 6) /* 108 */ // ((input[0, bigint, false] + 4) + 5) /* 109 */ // (input[0, bigint, false] + 4) ... /* 126 */ } ``` **After** ``` Generated code: /* 001 */ public Object generate(Object[] references) { ... /* 005 */ /** /* 006 */ * Codegend pipeline for /* 007 */ * Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 008 */ * +- Range 1, 1, 8, 999, [id#0L] /* 009 */ */ ... /* 075 */ // PRODUCE: Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 076 */ // PRODUCE: Range 1, 1, 8, 999, [id#0L] /* 077 */ // initialize Range ... /* 090 */ // CONSUME: Project [(((id#0L + 1) + 2) + 3) AS (((id + 1) + 2) + 3)#3L,(((id#0L + 4) + 5) + 6) AS (((id + 4) + 5) + 6)#4L] /* 091 */ // CONSUME: WholeStageCodegen /* 092 */ // (((input[0, bigint, false] + 1) + 2) + 3) ... /* 101 */ // (((input[0, bigint, false] + 4) + 5) + 6) ... /* 118 */ } ``` ## How was this patch tested? Pass the Jenkins tests and see the result of the following command manually. ```scala scala> spark.range(1, 1000).select('id+1+2+3, 'id+4+5+6).queryExecution.debug.codegen() ``` Author: Dongjoon Hyun <dongjoonapache.org> Author: Reynold Xin <rxindatabricks.com> Author: Dongjoon Hyun <dongj...@apache.org> Closes #13192 from dongjoon-hyun/SPARK-13135. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e2a53ba Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e2a53ba Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e2a53ba Branch: refs/heads/branch-2.0 Commit: 4e2a53ba4f0fcd1b5aff0b6428fe8babfd9bb842 Parents: 9f55951 Author: Dongjoon Hyun <dongj...@apache.org> Authored: Tue May 24 10:08:14 2016 -0700 Committer: Davies Liu <davies....@gmail.com> Committed: Tue May 24 10:09:22 2016 -0700 ---------------------------------------------------------------------- .../expressions/codegen/CodeFormatter.scala | 27 ++++++++++-- .../codegen/GenerateMutableProjection.scala | 3 +- .../expressions/codegen/GenerateOrdering.scala | 3 +- .../expressions/codegen/GeneratePredicate.scala | 3 +- .../codegen/GenerateSafeProjection.scala | 3 +- .../codegen/GenerateUnsafeProjection.scala | 3 +- .../codegen/GenerateUnsafeRowJoiner.scala | 2 +- .../codegen/CodeFormatterSuite.scala | 43 ++++++++++++++++++++ .../sql/execution/WholeStageCodegenExec.scala | 4 +- .../columnar/GenerateColumnAccessor.scala | 3 +- 10 files changed, 82 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index c741092..855ae64 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -40,7 +40,7 @@ object CodeFormatter { var lastLine: String = "dummy" input.split('\n').foreach { l => val line = l.trim() - val skip = line == "" && (lastLine == "" || lastLine.endsWith("{")) + val skip = line == "" && (lastLine == "" || lastLine.endsWith("{") || lastLine.endsWith("*/")) if (!skip) { code.append(line) code.append("\n") @@ -49,6 +49,24 @@ object CodeFormatter { } code.result() } + + def stripOverlappingComments(codeAndComment: CodeAndComment): CodeAndComment = { + val code = new StringBuilder + val map = codeAndComment.comment + var lastLine: String = "dummy" + codeAndComment.body.split('\n').foreach { l => + val line = l.trim() + val skip = lastLine.startsWith("/*") && lastLine.endsWith("*/") && + line.startsWith("/*") && line.endsWith("*/") && + map(lastLine).substring(3).contains(map(line).substring(3)) + if (!skip) { + code.append(line) + code.append("\n") + } + lastLine = line + } + new CodeAndComment(code.result().trim(), map) + } } private class CodeFormatter { @@ -100,8 +118,11 @@ private class CodeFormatter { indentString } code.append(f"/* ${currentLine}%03d */ ") - code.append(thisLineIndent) - code.append(line) + if (line.trim().length > 0) { + code.append(thisLineIndent) + if (inCommentBlock && line.startsWith("*") || line.startsWith("*/")) code.append(" ") + code.append(line) + } code.append("\n") indentLevel = newIndentLevel indentString = " " * (indentSize * newIndentLevel) http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala index 1305289..0f82d2e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala @@ -133,7 +133,8 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP } """ - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}") val c = CodeGenerator.compile(code) http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 1c53d62..c10829d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -136,7 +136,8 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } }""" - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"Generated Ordering by ${ordering.mkString(",")}:\n${CodeFormatter.format(code)}") CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering] http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala index ef44e6b..106bb27 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala @@ -61,7 +61,8 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool } }""" - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}") val p = CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate] http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala index 214dc40..b891f94 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala @@ -181,7 +181,8 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection] } """ - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}") val c = CodeGenerator.compile(code) http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala index 102f276..5efba4b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala @@ -390,7 +390,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro } """ - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}") val c = CodeGenerator.compile(code) http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala index 4dc1678..4aa5ec8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala @@ -193,7 +193,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U | } |} """.stripMargin - val code = new CodeAndComment(codeBody, Map.empty) + val code = CodeFormatter.stripOverlappingComments(new CodeAndComment(codeBody, Map.empty)) logDebug(s"SpecificUnsafeRowJoiner($schema1, $schema2):\n${CodeFormatter.format(code)}") val c = CodeGenerator.compile(code) http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index 6022f2d..76afc2e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -36,6 +36,22 @@ class CodeFormatterSuite extends SparkFunSuite { } } + test("removing overlapping comments") { + val code = new CodeAndComment( + """/*project_c4*/ + |/*project_c3*/ + |/*project_c2*/ + """.stripMargin, + Map( + "/*project_c4*/" -> "// (((input[0, bigint, false] + 1) + 2) + 3))", + "/*project_c3*/" -> "// ((input[0, bigint, false] + 1) + 2)", + "/*project_c2*/" -> "// (input[0, bigint, false] + 1)" + )) + + val reducedCode = CodeFormatter.stripOverlappingComments(code) + assert(reducedCode.body === "/*project_c4*/") + } + testCase("basic example") { """class A { |blahblah; @@ -147,4 +163,31 @@ class CodeFormatterSuite extends SparkFunSuite { |/* 006 */ } """.stripMargin } + + // scalastyle:off whitespace.end.of.line + testCase("reduce empty lines") { + CodeFormatter.stripExtraNewLines( + """class A { + | + | + | /*** comment1 */ + | + | class body; + | + | + | if (c) {duh;} + | else {boo;} + |}""".stripMargin) + }{ + """ + |/* 001 */ class A { + |/* 002 */ /*** comment1 */ + |/* 003 */ class body; + |/* 004 */ + |/* 005 */ if (c) {duh;} + |/* 006 */ else {boo;} + |/* 007 */ } + """.stripMargin + } + // scalastyle:on whitespace.end.of.line } http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index 2a1ce73..908e22d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -333,8 +333,8 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co """.trim // try to compile, helpful for debug - val cleanedSource = - new CodeAndComment(CodeFormatter.stripExtraNewLines(source), ctx.getPlaceHolderToComments()) + val cleanedSource = CodeFormatter.stripOverlappingComments( + new CodeAndComment(CodeFormatter.stripExtraNewLines(source), ctx.getPlaceHolderToComments())) logDebug(s"\n${CodeFormatter.format(cleanedSource)}") CodeGenerator.compile(cleanedSource) http://git-wip-us.apache.org/repos/asf/spark/blob/4e2a53ba/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala index e0b4811..1041bab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala @@ -224,7 +224,8 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera } }""" - val code = new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()) + val code = CodeFormatter.stripOverlappingComments( + new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())) logDebug(s"Generated ColumnarIterator:\n${CodeFormatter.format(code)}") CodeGenerator.compile(code).generate(Array.empty).asInstanceOf[ColumnarIterator] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org