[ 
https://issues.apache.org/jira/browse/SPARK-15205?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15294776#comment-15294776
 ] 

Kousuke Saruta commented on SPARK-15205:
----------------------------------------

As a result, this issue was resolved for 2.0 and 1.6.2 to resolve SPARK-15165.

> Codegen can compile the same source code more than twice
> --------------------------------------------------------
>
>                 Key: SPARK-15205
>                 URL: https://issues.apache.org/jira/browse/SPARK-15205
>             Project: Spark
>          Issue Type: Improvement
>          Components: SQL
>    Affects Versions: 2.0.0
>            Reporter: Kousuke Saruta
>
> Sometimes, we have generated codes they are equal except for comments.
> One example is here.
> {code}
> val df = sc.parallelize(1 to 10).toDF
> df.selectExpr("value + 1").show // query1
> df.selectExpr("value + 2").show // query2
> {code}
> The following code is one of generated code when query1 above is executed.
> {code}
> /* 001 */ 
> /* 002 */ public java.lang.Object generate(Object[] references) {
> /* 003 */   return new SpecificSafeProjection(references);
> /* 004 */ }
> /* 005 */ 
> /* 006 */ class SpecificSafeProjection extends 
> org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
> /* 007 */   
> /* 008 */   private Object[] references;
> /* 009 */   private MutableRow mutableRow;
> /* 010 */   private Object[] values;
> /* 011 */   private org.apache.spark.sql.types.StructType schema;
> /* 012 */   
> /* 013 */   
> /* 014 */   public SpecificSafeProjection(Object[] references) {
> /* 015 */     this.references = references;
> /* 016 */     mutableRow = (MutableRow) references[references.length - 1];
> /* 017 */     
> /* 018 */     this.schema = (org.apache.spark.sql.types.StructType) 
> references[0];
> /* 019 */   }
> /* 020 */   
> /* 021 */   public java.lang.Object apply(java.lang.Object _i) {
> /* 022 */     InternalRow i = (InternalRow) _i;
> /* 023 */     /* createexternalrow(if (isnull(input[0, int])) null else 
> input[0, int], StructField((value + 1),IntegerType,false)) */
> /* 024 */     values = new Object[1];
> /* 025 */     /* if (isnull(input[0, int])) null else input[0, int] */
> /* 026 */     /* isnull(input[0, int]) */
> /* 027 */     /* input[0, int] */
> /* 028 */     int value3 = i.getInt(0);
> /* 029 */     boolean isNull1 = false;
> /* 030 */     int value1 = -1;
> /* 031 */     if (!false && false) {
> /* 032 */       /* null */
> /* 033 */       final int value4 = -1;
> /* 034 */       isNull1 = true;
> /* 035 */       value1 = value4;
> /* 036 */     } else {
> /* 037 */       /* input[0, int] */
> /* 038 */       int value5 = i.getInt(0);
> /* 039 */       isNull1 = false;
> /* 040 */       value1 = value5;
> /* 041 */     }
> /* 042 */     if (isNull1) {
> /* 043 */       values[0] = null;
> /* 044 */     } else {
> /* 045 */       values[0] = value1;
> /* 046 */     }
> /* 047 */     
> /* 048 */     final org.apache.spark.sql.Row value = new 
> org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values, 
> this.schema);
> /* 049 */     if (false) {
> /* 050 */       mutableRow.setNullAt(0);
> /* 051 */     } else {
> /* 052 */       
> /* 053 */       mutableRow.update(0, value);
> /* 054 */     }
> /* 055 */     
> /* 056 */     return mutableRow;
> /* 057 */   }
> /* 058 */ }
> /* 059 */ 
> {code}
> On the other hand, the following code is for query2.
> {code}
> /* 001 */ 
> /* 002 */ public java.lang.Object generate(Object[] references) {
> /* 003 */   return new SpecificSafeProjection(references);
> /* 004 */ }
> /* 005 */ 
> /* 006 */ class SpecificSafeProjection extends 
> org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
> /* 007 */   
> /* 008 */   private Object[] references;
> /* 009 */   private MutableRow mutableRow;
> /* 010 */   private Object[] values;
> /* 011 */   private org.apache.spark.sql.types.StructType schema;
> /* 012 */   
> /* 013 */   
> /* 014 */   public SpecificSafeProjection(Object[] references) {
> /* 015 */     this.references = references;
> /* 016 */     mutableRow = (MutableRow) references[references.length - 1];
> /* 017 */     
> /* 018 */     this.schema = (org.apache.spark.sql.types.StructType) 
> references[0];
> /* 019 */   }
> /* 020 */   
> /* 021 */   public java.lang.Object apply(java.lang.Object _i) {
> /* 022 */     InternalRow i = (InternalRow) _i;
> /* 023 */     /* createexternalrow(if (isnull(input[0, int])) null else 
> input[0, int], StructField((value + 2),IntegerType,false)) */
> /* 024 */     values = new Object[1];
> /* 025 */     /* if (isnull(input[0, int])) null else input[0, int] */
> /* 026 */     /* isnull(input[0, int]) */
> /* 027 */     /* input[0, int] */
> /* 028 */     int value3 = i.getInt(0);
> /* 029 */     boolean isNull1 = false;
> /* 030 */     int value1 = -1;
> /* 031 */     if (!false && false) {
> /* 032 */       /* null */
> /* 033 */       final int value4 = -1;
> /* 034 */       isNull1 = true;
> /* 035 */       value1 = value4;
> /* 036 */     } else {
> /* 037 */       /* input[0, int] */
> /* 038 */       int value5 = i.getInt(0);
> /* 039 */       isNull1 = false;
> /* 040 */       value1 = value5;
> /* 041 */     }
> /* 042 */     if (isNull1) {
> /* 043 */       values[0] = null;
> /* 044 */     } else {
> /* 045 */       values[0] = value1;
> /* 046 */     }
> /* 047 */     
> /* 048 */     final org.apache.spark.sql.Row value = new 
> org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values, 
> this.schema);
> /* 049 */     if (false) {
> /* 050 */       mutableRow.setNullAt(0);
> /* 051 */     } else {
> /* 052 */       
> /* 053 */       mutableRow.update(0, value);
> /* 054 */     }
> /* 055 */     
> /* 056 */     return mutableRow;
> /* 057 */   }
> /* 058 */ }
> /* 059 */ 
> {code}
> As you can notice, those two generated codes are essentially equal but not 
> equal as String objects so they will be compiled each.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to