[ https://issues.apache.org/jira/browse/SPARK-15205?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15294776#comment-15294776 ]
Kousuke Saruta commented on SPARK-15205: ---------------------------------------- As a result, this issue was resolved for 2.0 and 1.6.2 to resolve SPARK-15165. > Codegen can compile the same source code more than twice > -------------------------------------------------------- > > Key: SPARK-15205 > URL: https://issues.apache.org/jira/browse/SPARK-15205 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 2.0.0 > Reporter: Kousuke Saruta > > Sometimes, we have generated codes they are equal except for comments. > One example is here. > {code} > val df = sc.parallelize(1 to 10).toDF > df.selectExpr("value + 1").show // query1 > df.selectExpr("value + 2").show // query2 > {code} > The following code is one of generated code when query1 above is executed. > {code} > /* 001 */ > /* 002 */ public java.lang.Object generate(Object[] references) { > /* 003 */ return new SpecificSafeProjection(references); > /* 004 */ } > /* 005 */ > /* 006 */ class SpecificSafeProjection extends > org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection { > /* 007 */ > /* 008 */ private Object[] references; > /* 009 */ private MutableRow mutableRow; > /* 010 */ private Object[] values; > /* 011 */ private org.apache.spark.sql.types.StructType schema; > /* 012 */ > /* 013 */ > /* 014 */ public SpecificSafeProjection(Object[] references) { > /* 015 */ this.references = references; > /* 016 */ mutableRow = (MutableRow) references[references.length - 1]; > /* 017 */ > /* 018 */ this.schema = (org.apache.spark.sql.types.StructType) > references[0]; > /* 019 */ } > /* 020 */ > /* 021 */ public java.lang.Object apply(java.lang.Object _i) { > /* 022 */ InternalRow i = (InternalRow) _i; > /* 023 */ /* createexternalrow(if (isnull(input[0, int])) null else > input[0, int], StructField((value + 1),IntegerType,false)) */ > /* 024 */ values = new Object[1]; > /* 025 */ /* if (isnull(input[0, int])) null else input[0, int] */ > /* 026 */ /* isnull(input[0, int]) */ > /* 027 */ /* input[0, int] */ > /* 028 */ int value3 = i.getInt(0); > /* 029 */ boolean isNull1 = false; > /* 030 */ int value1 = -1; > /* 031 */ if (!false && false) { > /* 032 */ /* null */ > /* 033 */ final int value4 = -1; > /* 034 */ isNull1 = true; > /* 035 */ value1 = value4; > /* 036 */ } else { > /* 037 */ /* input[0, int] */ > /* 038 */ int value5 = i.getInt(0); > /* 039 */ isNull1 = false; > /* 040 */ value1 = value5; > /* 041 */ } > /* 042 */ if (isNull1) { > /* 043 */ values[0] = null; > /* 044 */ } else { > /* 045 */ values[0] = value1; > /* 046 */ } > /* 047 */ > /* 048 */ final org.apache.spark.sql.Row value = new > org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values, > this.schema); > /* 049 */ if (false) { > /* 050 */ mutableRow.setNullAt(0); > /* 051 */ } else { > /* 052 */ > /* 053 */ mutableRow.update(0, value); > /* 054 */ } > /* 055 */ > /* 056 */ return mutableRow; > /* 057 */ } > /* 058 */ } > /* 059 */ > {code} > On the other hand, the following code is for query2. > {code} > /* 001 */ > /* 002 */ public java.lang.Object generate(Object[] references) { > /* 003 */ return new SpecificSafeProjection(references); > /* 004 */ } > /* 005 */ > /* 006 */ class SpecificSafeProjection extends > org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection { > /* 007 */ > /* 008 */ private Object[] references; > /* 009 */ private MutableRow mutableRow; > /* 010 */ private Object[] values; > /* 011 */ private org.apache.spark.sql.types.StructType schema; > /* 012 */ > /* 013 */ > /* 014 */ public SpecificSafeProjection(Object[] references) { > /* 015 */ this.references = references; > /* 016 */ mutableRow = (MutableRow) references[references.length - 1]; > /* 017 */ > /* 018 */ this.schema = (org.apache.spark.sql.types.StructType) > references[0]; > /* 019 */ } > /* 020 */ > /* 021 */ public java.lang.Object apply(java.lang.Object _i) { > /* 022 */ InternalRow i = (InternalRow) _i; > /* 023 */ /* createexternalrow(if (isnull(input[0, int])) null else > input[0, int], StructField((value + 2),IntegerType,false)) */ > /* 024 */ values = new Object[1]; > /* 025 */ /* if (isnull(input[0, int])) null else input[0, int] */ > /* 026 */ /* isnull(input[0, int]) */ > /* 027 */ /* input[0, int] */ > /* 028 */ int value3 = i.getInt(0); > /* 029 */ boolean isNull1 = false; > /* 030 */ int value1 = -1; > /* 031 */ if (!false && false) { > /* 032 */ /* null */ > /* 033 */ final int value4 = -1; > /* 034 */ isNull1 = true; > /* 035 */ value1 = value4; > /* 036 */ } else { > /* 037 */ /* input[0, int] */ > /* 038 */ int value5 = i.getInt(0); > /* 039 */ isNull1 = false; > /* 040 */ value1 = value5; > /* 041 */ } > /* 042 */ if (isNull1) { > /* 043 */ values[0] = null; > /* 044 */ } else { > /* 045 */ values[0] = value1; > /* 046 */ } > /* 047 */ > /* 048 */ final org.apache.spark.sql.Row value = new > org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values, > this.schema); > /* 049 */ if (false) { > /* 050 */ mutableRow.setNullAt(0); > /* 051 */ } else { > /* 052 */ > /* 053 */ mutableRow.update(0, value); > /* 054 */ } > /* 055 */ > /* 056 */ return mutableRow; > /* 057 */ } > /* 058 */ } > /* 059 */ > {code} > As you can notice, those two generated codes are essentially equal but not > equal as String objects so they will be compiled each. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org