[ https://issues.apache.org/jira/browse/SPARK-10953?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Xiangrui Meng updated SPARK-10953: ---------------------------------- Summary: Benchmark declarative/codegen vs. imperative code for univariate statistics (was: Benchmark codegen vs. hand-written code for univariate statistics) > Benchmark declarative/codegen vs. imperative code for univariate statistics > --------------------------------------------------------------------------- > > Key: SPARK-10953 > URL: https://issues.apache.org/jira/browse/SPARK-10953 > Project: Spark > Issue Type: Sub-task > Components: SQL > Reporter: Xiangrui Meng > Assignee: Jihong MA > Fix For: 1.6.0 > > > I checked the generated code for a simple stddev_pop call: > {code} > val df = sqlContext.range(100) > df.select(stddev_pop(col("id"))).show() > {code} > This is the generated code for the merge part, which is very long and > complex. I'm not sure whether we can get benefit from the code generation for > univariate statistics. We should benchmark it against Scala implementation. > {code} > 15/10/06 10:10:57 DEBUG GenerateMutableProjection: code for if > (isnull(input[1, DoubleType])) cast(0 as double) else input[1, DoubleType],if > (isnull(input[1, DoubleType])) input[6, DoubleType] else if (isnull(input[6, > DoubleType])) input[1, DoubleType] else (input[1, DoubleType] + input[6, > DoubleType]),if (isnull(input[3, DoubleType])) cast(0 as double) else > input[3, DoubleType],if (isnull(input[3, DoubleType])) input[8, DoubleType] > else if (isnull(input[8, DoubleType])) input[3, DoubleType] else (((input[3, > DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] * input[6, > DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])),if > (isnull(input[4, DoubleType])) input[9, DoubleType] else if (isnull(input[9, > DoubleType])) input[4, DoubleType] else ((input[4, DoubleType] + input[9, > DoubleType]) + ((((input[8, DoubleType] - input[2, DoubleType]) * (input[8, > DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6, > DoubleType])) / (input[0, DoubleType] + input[6, DoubleType]))): > public Object generate(org.apache.spark.sql.catalyst.expressions.Expression[] > expr) { > return new SpecificMutableProjection(expr); > } > class SpecificMutableProjection extends > org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection { > private org.apache.spark.sql.catalyst.expressions.Expression[] expressions; > private org.apache.spark.sql.catalyst.expressions.MutableRow mutableRow; > public > SpecificMutableProjection(org.apache.spark.sql.catalyst.expressions.Expression[] > expr) { > expressions = expr; > mutableRow = new > org.apache.spark.sql.catalyst.expressions.GenericMutableRow(5); > } > public > org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection > target(org.apache.spark.sql.catalyst.expressions.MutableRow row) { > mutableRow = row; > return this; > } > /* Provide immutable access to the last projected row. */ > public InternalRow currentValue() { > return (InternalRow) mutableRow; > } > public Object apply(Object _i) { > InternalRow i = (InternalRow) _i; > /* if (isnull(input[1, DoubleType])) cast(0 as double) else input[1, > DoubleType] */ > /* isnull(input[1, DoubleType]) */ > /* input[1, DoubleType] */ > boolean isNull4 = i.isNullAt(1); > double primitive5 = isNull4 ? -1.0 : (i.getDouble(1)); > boolean isNull0 = false; > double primitive1 = -1.0; > if (!false && isNull4) { > /* cast(0 as double) */ > /* 0 */ > boolean isNull6 = false; > double primitive7 = -1.0; > if (!false) { > primitive7 = (double) 0; > } > isNull0 = isNull6; > primitive1 = primitive7; > } else { > /* input[1, DoubleType] */ > boolean isNull10 = i.isNullAt(1); > double primitive11 = isNull10 ? -1.0 : (i.getDouble(1)); > isNull0 = isNull10; > primitive1 = primitive11; > } > if (isNull0) { > mutableRow.setNullAt(0); > } else { > mutableRow.setDouble(0, primitive1); > } > /* if (isnull(input[1, DoubleType])) input[6, DoubleType] else if > (isnull(input[6, DoubleType])) input[1, DoubleType] else (input[1, > DoubleType] + input[6, DoubleType]) */ > /* isnull(input[1, DoubleType]) */ > /* input[1, DoubleType] */ > boolean isNull16 = i.isNullAt(1); > double primitive17 = isNull16 ? -1.0 : (i.getDouble(1)); > boolean isNull12 = false; > double primitive13 = -1.0; > if (!false && isNull16) { > /* input[6, DoubleType] */ > boolean isNull18 = i.isNullAt(6); > double primitive19 = isNull18 ? -1.0 : (i.getDouble(6)); > isNull12 = isNull18; > primitive13 = primitive19; > } else { > /* if (isnull(input[6, DoubleType])) input[1, DoubleType] else > (input[1, DoubleType] + input[6, DoubleType]) */ > /* isnull(input[6, DoubleType]) */ > /* input[6, DoubleType] */ > boolean isNull24 = i.isNullAt(6); > double primitive25 = isNull24 ? -1.0 : (i.getDouble(6)); > boolean isNull20 = false; > double primitive21 = -1.0; > if (!false && isNull24) { > /* input[1, DoubleType] */ > boolean isNull26 = i.isNullAt(1); > double primitive27 = isNull26 ? -1.0 : (i.getDouble(1)); > isNull20 = isNull26; > primitive21 = primitive27; > } else { > /* (input[1, DoubleType] + input[6, DoubleType]) */ > /* input[1, DoubleType] */ > boolean isNull30 = i.isNullAt(1); > double primitive31 = isNull30 ? -1.0 : (i.getDouble(1)); > boolean isNull28 = isNull30; > double primitive29 = -1.0; > if (!isNull28) { > /* input[6, DoubleType] */ > boolean isNull32 = i.isNullAt(6); > double primitive33 = isNull32 ? -1.0 : (i.getDouble(6)); > if (!isNull32) { > primitive29 = primitive31 + primitive33; > } else { > isNull28 = true; > } > } > isNull20 = isNull28; > primitive21 = primitive29; > } > isNull12 = isNull20; > primitive13 = primitive21; > } > if (isNull12) { > mutableRow.setNullAt(1); > } else { > mutableRow.setDouble(1, primitive13); > } > /* if (isnull(input[3, DoubleType])) cast(0 as double) else input[3, > DoubleType] */ > /* isnull(input[3, DoubleType]) */ > /* input[3, DoubleType] */ > boolean isNull38 = i.isNullAt(3); > double primitive39 = isNull38 ? -1.0 : (i.getDouble(3)); > boolean isNull34 = false; > double primitive35 = -1.0; > if (!false && isNull38) { > /* cast(0 as double) */ > /* 0 */ > boolean isNull40 = false; > double primitive41 = -1.0; > if (!false) { > primitive41 = (double) 0; > } > isNull34 = isNull40; > primitive35 = primitive41; > } else { > /* input[3, DoubleType] */ > boolean isNull44 = i.isNullAt(3); > double primitive45 = isNull44 ? -1.0 : (i.getDouble(3)); > isNull34 = isNull44; > primitive35 = primitive45; > } > if (isNull34) { > mutableRow.setNullAt(2); > } else { > mutableRow.setDouble(2, primitive35); > } > /* if (isnull(input[3, DoubleType])) input[8, DoubleType] else if > (isnull(input[8, DoubleType])) input[3, DoubleType] else (((input[3, > DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] * input[6, > DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */ > /* isnull(input[3, DoubleType]) */ > /* input[3, DoubleType] */ > boolean isNull50 = i.isNullAt(3); > double primitive51 = isNull50 ? -1.0 : (i.getDouble(3)); > boolean isNull46 = false; > double primitive47 = -1.0; > if (!false && isNull50) { > /* input[8, DoubleType] */ > boolean isNull52 = i.isNullAt(8); > double primitive53 = isNull52 ? -1.0 : (i.getDouble(8)); > isNull46 = isNull52; > primitive47 = primitive53; > } else { > /* if (isnull(input[8, DoubleType])) input[3, DoubleType] else > (((input[3, DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] * > input[6, DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */ > /* isnull(input[8, DoubleType]) */ > /* input[8, DoubleType] */ > boolean isNull58 = i.isNullAt(8); > double primitive59 = isNull58 ? -1.0 : (i.getDouble(8)); > boolean isNull54 = false; > double primitive55 = -1.0; > if (!false && isNull58) { > /* input[3, DoubleType] */ > boolean isNull60 = i.isNullAt(3); > double primitive61 = isNull60 ? -1.0 : (i.getDouble(3)); > isNull54 = isNull60; > primitive55 = primitive61; > } else { > /* (((input[3, DoubleType] * input[0, DoubleType]) + (input[8, > DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + input[6, > DoubleType])) */ > /* (input[0, DoubleType] + input[6, DoubleType]) */ > /* input[0, DoubleType] */ > boolean isNull80 = i.isNullAt(0); > double primitive81 = isNull80 ? -1.0 : (i.getDouble(0)); > boolean isNull78 = isNull80; > double primitive79 = -1.0; > if (!isNull78) { > /* input[6, DoubleType] */ > boolean isNull82 = i.isNullAt(6); > double primitive83 = isNull82 ? -1.0 : (i.getDouble(6)); > if (!isNull82) { > primitive79 = primitive81 + primitive83; > } else { > isNull78 = true; > } > } > boolean isNull62 = false; > double primitive63 = -1.0; > if (isNull78 || primitive79 == 0) { > isNull62 = true; > } else { > /* ((input[3, DoubleType] * input[0, DoubleType]) + (input[8, > DoubleType] * input[6, DoubleType])) */ > /* (input[3, DoubleType] * input[0, DoubleType]) */ > /* input[3, DoubleType] */ > boolean isNull68 = i.isNullAt(3); > double primitive69 = isNull68 ? -1.0 : (i.getDouble(3)); > boolean isNull66 = isNull68; > double primitive67 = -1.0; > if (!isNull66) { > /* input[0, DoubleType] */ > boolean isNull70 = i.isNullAt(0); > double primitive71 = isNull70 ? -1.0 : (i.getDouble(0)); > if (!isNull70) { > primitive67 = primitive69 * primitive71; > } else { > isNull66 = true; > } > } > boolean isNull64 = isNull66; > double primitive65 = -1.0; > if (!isNull64) { > /* (input[8, DoubleType] * input[6, DoubleType]) */ > /* input[8, DoubleType] */ > boolean isNull74 = i.isNullAt(8); > double primitive75 = isNull74 ? -1.0 : (i.getDouble(8)); > boolean isNull72 = isNull74; > double primitive73 = -1.0; > if (!isNull72) { > /* input[6, DoubleType] */ > boolean isNull76 = i.isNullAt(6); > double primitive77 = isNull76 ? -1.0 : (i.getDouble(6)); > if (!isNull76) { > primitive73 = primitive75 * primitive77; > } else { > isNull72 = true; > } > } > if (!isNull72) { > primitive65 = primitive67 + primitive73; > } else { > isNull64 = true; > } > } > if (isNull64) { > isNull62 = true; > } else { > primitive63 = (double)(primitive65 / primitive79); > } > } > isNull54 = isNull62; > primitive55 = primitive63; > } > isNull46 = isNull54; > primitive47 = primitive55; > } > if (isNull46) { > mutableRow.setNullAt(3); > } else { > mutableRow.setDouble(3, primitive47); > } > /* if (isnull(input[4, DoubleType])) input[9, DoubleType] else if > (isnull(input[9, DoubleType])) input[4, DoubleType] else ((input[4, > DoubleType] + input[9, DoubleType]) + ((((input[8, DoubleType] - input[2, > DoubleType]) * (input[8, DoubleType] - input[2, DoubleType])) * (input[0, > DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + input[6, > DoubleType]))) */ > /* isnull(input[4, DoubleType]) */ > /* input[4, DoubleType] */ > boolean isNull88 = i.isNullAt(4); > double primitive89 = isNull88 ? -1.0 : (i.getDouble(4)); > boolean isNull84 = false; > double primitive85 = -1.0; > if (!false && isNull88) { > /* input[9, DoubleType] */ > boolean isNull90 = i.isNullAt(9); > double primitive91 = isNull90 ? -1.0 : (i.getDouble(9)); > isNull84 = isNull90; > primitive85 = primitive91; > } else { > /* if (isnull(input[9, DoubleType])) input[4, DoubleType] else > ((input[4, DoubleType] + input[9, DoubleType]) + ((((input[8, DoubleType] - > input[2, DoubleType]) * (input[8, DoubleType] - input[2, DoubleType])) * > (input[0, DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + > input[6, DoubleType]))) */ > /* isnull(input[9, DoubleType]) */ > /* input[9, DoubleType] */ > boolean isNull96 = i.isNullAt(9); > double primitive97 = isNull96 ? -1.0 : (i.getDouble(9)); > boolean isNull92 = false; > double primitive93 = -1.0; > if (!false && isNull96) { > /* input[4, DoubleType] */ > boolean isNull98 = i.isNullAt(4); > double primitive99 = isNull98 ? -1.0 : (i.getDouble(4)); > isNull92 = isNull98; > primitive93 = primitive99; > } else { > /* ((input[4, DoubleType] + input[9, DoubleType]) + ((((input[8, > DoubleType] - input[2, DoubleType]) * (input[8, DoubleType] - input[2, > DoubleType])) * (input[0, DoubleType] * input[6, DoubleType])) / (input[0, > DoubleType] + input[6, DoubleType]))) */ > /* (input[4, DoubleType] + input[9, DoubleType]) */ > /* input[4, DoubleType] */ > boolean isNull104 = i.isNullAt(4); > double primitive105 = isNull104 ? -1.0 : (i.getDouble(4)); > boolean isNull102 = isNull104; > double primitive103 = -1.0; > if (!isNull102) { > /* input[9, DoubleType] */ > boolean isNull106 = i.isNullAt(9); > double primitive107 = isNull106 ? -1.0 : (i.getDouble(9)); > if (!isNull106) { > primitive103 = primitive105 + primitive107; > } else { > isNull102 = true; > } > } > boolean isNull100 = isNull102; > double primitive101 = -1.0; > if (!isNull100) { > /* ((((input[8, DoubleType] - input[2, DoubleType]) * (input[8, > DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6, > DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */ > /* (input[0, DoubleType] + input[6, DoubleType]) */ > /* input[0, DoubleType] */ > boolean isNull134 = i.isNullAt(0); > double primitive135 = isNull134 ? -1.0 : (i.getDouble(0)); > boolean isNull132 = isNull134; > double primitive133 = -1.0; > if (!isNull132) { > /* input[6, DoubleType] */ > boolean isNull136 = i.isNullAt(6); > double primitive137 = isNull136 ? -1.0 : (i.getDouble(6)); > if (!isNull136) { > primitive133 = primitive135 + primitive137; > } else { > isNull132 = true; > } > } > boolean isNull108 = false; > double primitive109 = -1.0; > if (isNull132 || primitive133 == 0) { > isNull108 = true; > } else { > /* (((input[8, DoubleType] - input[2, DoubleType]) * (input[8, > DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6, > DoubleType])) */ > /* ((input[8, DoubleType] - input[2, DoubleType]) * (input[8, > DoubleType] - input[2, DoubleType])) */ > /* (input[8, DoubleType] - input[2, DoubleType]) */ > /* input[8, DoubleType] */ > boolean isNull116 = i.isNullAt(8); > double primitive117 = isNull116 ? -1.0 : (i.getDouble(8)); > boolean isNull114 = isNull116; > double primitive115 = -1.0; > if (!isNull114) { > /* input[2, DoubleType] */ > boolean isNull118 = i.isNullAt(2); > double primitive119 = isNull118 ? -1.0 : (i.getDouble(2)); > if (!isNull118) { > primitive115 = primitive117 - primitive119; > } else { > isNull114 = true; > } > } > boolean isNull112 = isNull114; > double primitive113 = -1.0; > if (!isNull112) { > /* (input[8, DoubleType] - input[2, DoubleType]) */ > /* input[8, DoubleType] */ > boolean isNull122 = i.isNullAt(8); > double primitive123 = isNull122 ? -1.0 : (i.getDouble(8)); > boolean isNull120 = isNull122; > double primitive121 = -1.0; > if (!isNull120) { > /* input[2, DoubleType] */ > boolean isNull124 = i.isNullAt(2); > double primitive125 = isNull124 ? -1.0 : (i.getDouble(2)); > if (!isNull124) { > primitive121 = primitive123 - primitive125; > } else { > isNull120 = true; > } > } > if (!isNull120) { > primitive113 = primitive115 * primitive121; > } else { > isNull112 = true; > } > } > boolean isNull110 = isNull112; > double primitive111 = -1.0; > if (!isNull110) { > /* (input[0, DoubleType] * input[6, DoubleType]) */ > /* input[0, DoubleType] */ > boolean isNull128 = i.isNullAt(0); > double primitive129 = isNull128 ? -1.0 : (i.getDouble(0)); > boolean isNull126 = isNull128; > double primitive127 = -1.0; > if (!isNull126) { > /* input[6, DoubleType] */ > boolean isNull130 = i.isNullAt(6); > double primitive131 = isNull130 ? -1.0 : (i.getDouble(6)); > if (!isNull130) { > primitive127 = primitive129 * primitive131; > } else { > isNull126 = true; > } > } > if (!isNull126) { > primitive111 = primitive113 * primitive127; > } else { > isNull110 = true; > } > } > if (isNull110) { > isNull108 = true; > } else { > primitive109 = (double)(primitive111 / primitive133); > } > } > if (!isNull108) { > primitive101 = primitive103 + primitive109; > } else { > isNull100 = true; > } > } > isNull92 = isNull100; > primitive93 = primitive101; > } > isNull84 = isNull92; > primitive85 = primitive93; > } > if (isNull84) { > mutableRow.setNullAt(4); > } else { > mutableRow.setDouble(4, primitive85); > } > return mutableRow; > } > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org