[ 
https://issues.apache.org/jira/browse/SPARK-10953?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14961209#comment-14961209
 ] 

Xiangrui Meng commented on SPARK-10953:
---------------------------------------

That sounds good. I'm closing this for now since the conclusion is clear.

> Benchmark codegen vs. hand-written code for univariate statistics
> -----------------------------------------------------------------
>
>                 Key: SPARK-10953
>                 URL: https://issues.apache.org/jira/browse/SPARK-10953
>             Project: Spark
>          Issue Type: Sub-task
>          Components: SQL
>            Reporter: Xiangrui Meng
>            Assignee: Jihong MA
>             Fix For: 1.6.0
>
>
> I checked the generated code for a simple stddev_pop call:
> {code}
> val df = sqlContext.range(100)
> df.select(stddev_pop(col("id"))).show()
> {code}
> This is the generated code for the merge part, which is very long and 
> complex. I'm not sure whether we can get benefit from the code generation for 
> univariate statistics. We should benchmark it against Scala implementation.
> {code}
> 15/10/06 10:10:57 DEBUG GenerateMutableProjection: code for if 
> (isnull(input[1, DoubleType])) cast(0 as double) else input[1, DoubleType],if 
> (isnull(input[1, DoubleType])) input[6, DoubleType] else if (isnull(input[6, 
> DoubleType])) input[1, DoubleType] else (input[1, DoubleType] + input[6, 
> DoubleType]),if (isnull(input[3, DoubleType])) cast(0 as double) else 
> input[3, DoubleType],if (isnull(input[3, DoubleType])) input[8, DoubleType] 
> else if (isnull(input[8, DoubleType])) input[3, DoubleType] else (((input[3, 
> DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] * input[6, 
> DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])),if 
> (isnull(input[4, DoubleType])) input[9, DoubleType] else if (isnull(input[9, 
> DoubleType])) input[4, DoubleType] else ((input[4, DoubleType] + input[9, 
> DoubleType]) + ((((input[8, DoubleType] - input[2, DoubleType]) * (input[8, 
> DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6, 
> DoubleType])) / (input[0, DoubleType] + input[6, DoubleType]))):
> public Object generate(org.apache.spark.sql.catalyst.expressions.Expression[] 
> expr) {
>   return new SpecificMutableProjection(expr);
> }
> class SpecificMutableProjection extends 
> org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection {
>   private org.apache.spark.sql.catalyst.expressions.Expression[] expressions;
>   private org.apache.spark.sql.catalyst.expressions.MutableRow mutableRow;
>   public 
> SpecificMutableProjection(org.apache.spark.sql.catalyst.expressions.Expression[]
>  expr) {
>     expressions = expr;
>     mutableRow = new 
> org.apache.spark.sql.catalyst.expressions.GenericMutableRow(5);
>   }
>   public 
> org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection 
> target(org.apache.spark.sql.catalyst.expressions.MutableRow row) {
>     mutableRow = row;
>     return this;
>   }
>   /* Provide immutable access to the last projected row. */
>   public InternalRow currentValue() {
>     return (InternalRow) mutableRow;
>   }
>   public Object apply(Object _i) {
>     InternalRow i = (InternalRow) _i;
>     /* if (isnull(input[1, DoubleType])) cast(0 as double) else input[1, 
> DoubleType] */
>     /* isnull(input[1, DoubleType]) */
>     /* input[1, DoubleType] */
>     boolean isNull4 = i.isNullAt(1);
>     double primitive5 = isNull4 ? -1.0 : (i.getDouble(1));
>     boolean isNull0 = false;
>     double primitive1 = -1.0;
>     if (!false && isNull4) {
>       /* cast(0 as double) */
>       /* 0 */
>       boolean isNull6 = false;
>       double primitive7 = -1.0;
>       if (!false) {
>         primitive7 = (double) 0;
>       }
>       isNull0 = isNull6;
>       primitive1 = primitive7;
>     } else {
>       /* input[1, DoubleType] */
>       boolean isNull10 = i.isNullAt(1);
>       double primitive11 = isNull10 ? -1.0 : (i.getDouble(1));
>       isNull0 = isNull10;
>       primitive1 = primitive11;
>     }
>     if (isNull0) {
>       mutableRow.setNullAt(0);
>     } else {
>       mutableRow.setDouble(0, primitive1);
>     }
>     /* if (isnull(input[1, DoubleType])) input[6, DoubleType] else if 
> (isnull(input[6, DoubleType])) input[1, DoubleType] else (input[1, 
> DoubleType] + input[6, DoubleType]) */
>     /* isnull(input[1, DoubleType]) */
>     /* input[1, DoubleType] */
>     boolean isNull16 = i.isNullAt(1);
>     double primitive17 = isNull16 ? -1.0 : (i.getDouble(1));
>     boolean isNull12 = false;
>     double primitive13 = -1.0;
>     if (!false && isNull16) {
>       /* input[6, DoubleType] */
>       boolean isNull18 = i.isNullAt(6);
>       double primitive19 = isNull18 ? -1.0 : (i.getDouble(6));
>       isNull12 = isNull18;
>       primitive13 = primitive19;
>     } else {
>       /* if (isnull(input[6, DoubleType])) input[1, DoubleType] else 
> (input[1, DoubleType] + input[6, DoubleType]) */
>       /* isnull(input[6, DoubleType]) */
>       /* input[6, DoubleType] */
>       boolean isNull24 = i.isNullAt(6);
>       double primitive25 = isNull24 ? -1.0 : (i.getDouble(6));
>       boolean isNull20 = false;
>       double primitive21 = -1.0;
>       if (!false && isNull24) {
>         /* input[1, DoubleType] */
>         boolean isNull26 = i.isNullAt(1);
>         double primitive27 = isNull26 ? -1.0 : (i.getDouble(1));
>         isNull20 = isNull26;
>         primitive21 = primitive27;
>       } else {
>         /* (input[1, DoubleType] + input[6, DoubleType]) */
>         /* input[1, DoubleType] */
>         boolean isNull30 = i.isNullAt(1);
>         double primitive31 = isNull30 ? -1.0 : (i.getDouble(1));
>         boolean isNull28 = isNull30;
>         double primitive29 = -1.0;
>         if (!isNull28) {
>           /* input[6, DoubleType] */
>           boolean isNull32 = i.isNullAt(6);
>           double primitive33 = isNull32 ? -1.0 : (i.getDouble(6));
>           if (!isNull32) {
>             primitive29 = primitive31 + primitive33;
>           } else {
>             isNull28 = true;
>           }
>         }
>         isNull20 = isNull28;
>         primitive21 = primitive29;
>       }
>       isNull12 = isNull20;
>       primitive13 = primitive21;
>     }
>     if (isNull12) {
>       mutableRow.setNullAt(1);
>     } else {
>       mutableRow.setDouble(1, primitive13);
>     }
>     /* if (isnull(input[3, DoubleType])) cast(0 as double) else input[3, 
> DoubleType] */
>     /* isnull(input[3, DoubleType]) */
>     /* input[3, DoubleType] */
>     boolean isNull38 = i.isNullAt(3);
>     double primitive39 = isNull38 ? -1.0 : (i.getDouble(3));
>     boolean isNull34 = false;
>     double primitive35 = -1.0;
>     if (!false && isNull38) {
>       /* cast(0 as double) */
>       /* 0 */
>       boolean isNull40 = false;
>       double primitive41 = -1.0;
>       if (!false) {
>         primitive41 = (double) 0;
>       }
>       isNull34 = isNull40;
>       primitive35 = primitive41;
>     } else {
>       /* input[3, DoubleType] */
>       boolean isNull44 = i.isNullAt(3);
>       double primitive45 = isNull44 ? -1.0 : (i.getDouble(3));
>       isNull34 = isNull44;
>       primitive35 = primitive45;
>     }
>     if (isNull34) {
>       mutableRow.setNullAt(2);
>     } else {
>       mutableRow.setDouble(2, primitive35);
>     }
>     /* if (isnull(input[3, DoubleType])) input[8, DoubleType] else if 
> (isnull(input[8, DoubleType])) input[3, DoubleType] else (((input[3, 
> DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] * input[6, 
> DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */
>     /* isnull(input[3, DoubleType]) */
>     /* input[3, DoubleType] */
>     boolean isNull50 = i.isNullAt(3);
>     double primitive51 = isNull50 ? -1.0 : (i.getDouble(3));
>     boolean isNull46 = false;
>     double primitive47 = -1.0;
>     if (!false && isNull50) {
>       /* input[8, DoubleType] */
>       boolean isNull52 = i.isNullAt(8);
>       double primitive53 = isNull52 ? -1.0 : (i.getDouble(8));
>       isNull46 = isNull52;
>       primitive47 = primitive53;
>     } else {
>       /* if (isnull(input[8, DoubleType])) input[3, DoubleType] else 
> (((input[3, DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] * 
> input[6, DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */
>       /* isnull(input[8, DoubleType]) */
>       /* input[8, DoubleType] */
>       boolean isNull58 = i.isNullAt(8);
>       double primitive59 = isNull58 ? -1.0 : (i.getDouble(8));
>       boolean isNull54 = false;
>       double primitive55 = -1.0;
>       if (!false && isNull58) {
>         /* input[3, DoubleType] */
>         boolean isNull60 = i.isNullAt(3);
>         double primitive61 = isNull60 ? -1.0 : (i.getDouble(3));
>         isNull54 = isNull60;
>         primitive55 = primitive61;
>       } else {
>         /* (((input[3, DoubleType] * input[0, DoubleType]) + (input[8, 
> DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + input[6, 
> DoubleType])) */
>         /* (input[0, DoubleType] + input[6, DoubleType]) */
>         /* input[0, DoubleType] */
>         boolean isNull80 = i.isNullAt(0);
>         double primitive81 = isNull80 ? -1.0 : (i.getDouble(0));
>         boolean isNull78 = isNull80;
>         double primitive79 = -1.0;
>         if (!isNull78) {
>           /* input[6, DoubleType] */
>           boolean isNull82 = i.isNullAt(6);
>           double primitive83 = isNull82 ? -1.0 : (i.getDouble(6));
>           if (!isNull82) {
>             primitive79 = primitive81 + primitive83;
>           } else {
>             isNull78 = true;
>           }
>         }
>         boolean isNull62 = false;
>         double primitive63 = -1.0;
>         if (isNull78 || primitive79 == 0) {
>           isNull62 = true;
>         } else {
>           /* ((input[3, DoubleType] * input[0, DoubleType]) + (input[8, 
> DoubleType] * input[6, DoubleType])) */
>           /* (input[3, DoubleType] * input[0, DoubleType]) */
>           /* input[3, DoubleType] */
>           boolean isNull68 = i.isNullAt(3);
>           double primitive69 = isNull68 ? -1.0 : (i.getDouble(3));
>           boolean isNull66 = isNull68;
>           double primitive67 = -1.0;
>           if (!isNull66) {
>             /* input[0, DoubleType] */
>             boolean isNull70 = i.isNullAt(0);
>             double primitive71 = isNull70 ? -1.0 : (i.getDouble(0));
>             if (!isNull70) {
>               primitive67 = primitive69 * primitive71;
>             } else {
>               isNull66 = true;
>             }
>           }
>           boolean isNull64 = isNull66;
>           double primitive65 = -1.0;
>           if (!isNull64) {
>             /* (input[8, DoubleType] * input[6, DoubleType]) */
>             /* input[8, DoubleType] */
>             boolean isNull74 = i.isNullAt(8);
>             double primitive75 = isNull74 ? -1.0 : (i.getDouble(8));
>             boolean isNull72 = isNull74;
>             double primitive73 = -1.0;
>             if (!isNull72) {
>               /* input[6, DoubleType] */
>               boolean isNull76 = i.isNullAt(6);
>               double primitive77 = isNull76 ? -1.0 : (i.getDouble(6));
>               if (!isNull76) {
>                 primitive73 = primitive75 * primitive77;
>               } else {
>                 isNull72 = true;
>               }
>             }
>             if (!isNull72) {
>               primitive65 = primitive67 + primitive73;
>             } else {
>               isNull64 = true;
>             }
>           }
>           if (isNull64) {
>             isNull62 = true;
>           } else {
>             primitive63 = (double)(primitive65 / primitive79);
>           }
>         }
>         isNull54 = isNull62;
>         primitive55 = primitive63;
>       }
>       isNull46 = isNull54;
>       primitive47 = primitive55;
>     }
>     if (isNull46) {
>       mutableRow.setNullAt(3);
>     } else {
>       mutableRow.setDouble(3, primitive47);
>     }
>     /* if (isnull(input[4, DoubleType])) input[9, DoubleType] else if 
> (isnull(input[9, DoubleType])) input[4, DoubleType] else ((input[4, 
> DoubleType] + input[9, DoubleType]) + ((((input[8, DoubleType] - input[2, 
> DoubleType]) * (input[8, DoubleType] - input[2, DoubleType])) * (input[0, 
> DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + input[6, 
> DoubleType]))) */
>     /* isnull(input[4, DoubleType]) */
>     /* input[4, DoubleType] */
>     boolean isNull88 = i.isNullAt(4);
>     double primitive89 = isNull88 ? -1.0 : (i.getDouble(4));
>     boolean isNull84 = false;
>     double primitive85 = -1.0;
>     if (!false && isNull88) {
>       /* input[9, DoubleType] */
>       boolean isNull90 = i.isNullAt(9);
>       double primitive91 = isNull90 ? -1.0 : (i.getDouble(9));
>       isNull84 = isNull90;
>       primitive85 = primitive91;
>     } else {
>       /* if (isnull(input[9, DoubleType])) input[4, DoubleType] else 
> ((input[4, DoubleType] + input[9, DoubleType]) + ((((input[8, DoubleType] - 
> input[2, DoubleType]) * (input[8, DoubleType] - input[2, DoubleType])) * 
> (input[0, DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + 
> input[6, DoubleType]))) */
>       /* isnull(input[9, DoubleType]) */
>       /* input[9, DoubleType] */
>       boolean isNull96 = i.isNullAt(9);
>       double primitive97 = isNull96 ? -1.0 : (i.getDouble(9));
>       boolean isNull92 = false;
>       double primitive93 = -1.0;
>       if (!false && isNull96) {
>         /* input[4, DoubleType] */
>         boolean isNull98 = i.isNullAt(4);
>         double primitive99 = isNull98 ? -1.0 : (i.getDouble(4));
>         isNull92 = isNull98;
>         primitive93 = primitive99;
>       } else {
>         /* ((input[4, DoubleType] + input[9, DoubleType]) + ((((input[8, 
> DoubleType] - input[2, DoubleType]) * (input[8, DoubleType] - input[2, 
> DoubleType])) * (input[0, DoubleType] * input[6, DoubleType])) / (input[0, 
> DoubleType] + input[6, DoubleType]))) */
>         /* (input[4, DoubleType] + input[9, DoubleType]) */
>         /* input[4, DoubleType] */
>         boolean isNull104 = i.isNullAt(4);
>         double primitive105 = isNull104 ? -1.0 : (i.getDouble(4));
>         boolean isNull102 = isNull104;
>         double primitive103 = -1.0;
>         if (!isNull102) {
>           /* input[9, DoubleType] */
>           boolean isNull106 = i.isNullAt(9);
>           double primitive107 = isNull106 ? -1.0 : (i.getDouble(9));
>           if (!isNull106) {
>             primitive103 = primitive105 + primitive107;
>           } else {
>             isNull102 = true;
>           }
>         }
>         boolean isNull100 = isNull102;
>         double primitive101 = -1.0;
>         if (!isNull100) {
>           /* ((((input[8, DoubleType] - input[2, DoubleType]) * (input[8, 
> DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6, 
> DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */
>           /* (input[0, DoubleType] + input[6, DoubleType]) */
>           /* input[0, DoubleType] */
>           boolean isNull134 = i.isNullAt(0);
>           double primitive135 = isNull134 ? -1.0 : (i.getDouble(0));
>           boolean isNull132 = isNull134;
>           double primitive133 = -1.0;
>           if (!isNull132) {
>             /* input[6, DoubleType] */
>             boolean isNull136 = i.isNullAt(6);
>             double primitive137 = isNull136 ? -1.0 : (i.getDouble(6));
>             if (!isNull136) {
>               primitive133 = primitive135 + primitive137;
>             } else {
>               isNull132 = true;
>             }
>           }
>           boolean isNull108 = false;
>           double primitive109 = -1.0;
>           if (isNull132 || primitive133 == 0) {
>             isNull108 = true;
>           } else {
>             /* (((input[8, DoubleType] - input[2, DoubleType]) * (input[8, 
> DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6, 
> DoubleType])) */
>             /* ((input[8, DoubleType] - input[2, DoubleType]) * (input[8, 
> DoubleType] - input[2, DoubleType])) */
>             /* (input[8, DoubleType] - input[2, DoubleType]) */
>             /* input[8, DoubleType] */
>             boolean isNull116 = i.isNullAt(8);
>             double primitive117 = isNull116 ? -1.0 : (i.getDouble(8));
>             boolean isNull114 = isNull116;
>             double primitive115 = -1.0;
>             if (!isNull114) {
>               /* input[2, DoubleType] */
>               boolean isNull118 = i.isNullAt(2);
>               double primitive119 = isNull118 ? -1.0 : (i.getDouble(2));
>               if (!isNull118) {
>                 primitive115 = primitive117 - primitive119;
>               } else {
>                 isNull114 = true;
>               }
>             }
>             boolean isNull112 = isNull114;
>             double primitive113 = -1.0;
>             if (!isNull112) {
>               /* (input[8, DoubleType] - input[2, DoubleType]) */
>               /* input[8, DoubleType] */
>               boolean isNull122 = i.isNullAt(8);
>               double primitive123 = isNull122 ? -1.0 : (i.getDouble(8));
>               boolean isNull120 = isNull122;
>               double primitive121 = -1.0;
>               if (!isNull120) {
>                 /* input[2, DoubleType] */
>                 boolean isNull124 = i.isNullAt(2);
>                 double primitive125 = isNull124 ? -1.0 : (i.getDouble(2));
>                 if (!isNull124) {
>                   primitive121 = primitive123 - primitive125;
>                 } else {
>                   isNull120 = true;
>                 }
>               }
>               if (!isNull120) {
>                 primitive113 = primitive115 * primitive121;
>               } else {
>                 isNull112 = true;
>               }
>             }
>             boolean isNull110 = isNull112;
>             double primitive111 = -1.0;
>             if (!isNull110) {
>               /* (input[0, DoubleType] * input[6, DoubleType]) */
>               /* input[0, DoubleType] */
>               boolean isNull128 = i.isNullAt(0);
>               double primitive129 = isNull128 ? -1.0 : (i.getDouble(0));
>               boolean isNull126 = isNull128;
>               double primitive127 = -1.0;
>               if (!isNull126) {
>                 /* input[6, DoubleType] */
>                 boolean isNull130 = i.isNullAt(6);
>                 double primitive131 = isNull130 ? -1.0 : (i.getDouble(6));
>                 if (!isNull130) {
>                   primitive127 = primitive129 * primitive131;
>                 } else {
>                   isNull126 = true;
>                 }
>               }
>               if (!isNull126) {
>                 primitive111 = primitive113 * primitive127;
>               } else {
>                 isNull110 = true;
>               }
>             }
>             if (isNull110) {
>               isNull108 = true;
>             } else {
>               primitive109 = (double)(primitive111 / primitive133);
>             }
>           }
>           if (!isNull108) {
>             primitive101 = primitive103 + primitive109;
>           } else {
>             isNull100 = true;
>           }
>         }
>         isNull92 = isNull100;
>         primitive93 = primitive101;
>       }
>       isNull84 = isNull92;
>       primitive85 = primitive93;
>     }
>     if (isNull84) {
>       mutableRow.setNullAt(4);
>     } else {
>       mutableRow.setDouble(4, primitive85);
>     }
>     return mutableRow;
>   }
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to