[ 
https://issues.apache.org/jira/browse/SPARK-18147?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Wenchen Fan updated SPARK-18147:
--------------------------------
    Assignee: Liang-Chi Hsieh

> Broken Spark SQL Codegen
> ------------------------
>
>                 Key: SPARK-18147
>                 URL: https://issues.apache.org/jira/browse/SPARK-18147
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.0.1
>            Reporter: koert kuipers
>            Assignee: Liang-Chi Hsieh
>            Priority: Critical
>             Fix For: 2.1.0
>
>
> this is me on purpose trying to break spark sql codegen to uncover potential 
> issues, by creating arbitrately complex data structures using primitives, 
> strings, basic collections (map, seq, option), tuples, and case classes.
> first example: nested case classes
> code:
> {noformat}
> class ComplexResultAgg[B: TypeTag, C: TypeTag](val zero: B, result: C) 
> extends Aggregator[Row, B, C] {
>   override def reduce(b: B, input: Row): B = b
>   override def merge(b1: B, b2: B): B = b1
>   override def finish(reduction: B): C = result
>   override def bufferEncoder: Encoder[B] = ExpressionEncoder[B]()
>   override def outputEncoder: Encoder[C] = ExpressionEncoder[C]()
> }
> case class Struct2(d: Double = 0.0, s1: Seq[Double] = Seq.empty, s2: 
> Seq[Long] = Seq.empty)
> case class Struct3(a: Struct2 = Struct2(), b: Struct2 = Struct2())
> val df1 = Seq(("a", "aa"), ("a", "aa"), ("b", "b"), ("b", null)).toDF("x", 
> "y").groupBy("x").agg(
>   new ComplexResultAgg("boo", Struct3()).toColumn
> )
> df1.printSchema
> df1.show
> {noformat}
> the result is:
> {noformat}
> [info]   Cause: java.util.concurrent.ExecutionException: java.lang.Exception: 
> failed to compile: org.codehaus.commons.compiler.CompileException: File 
> 'generated.java', Line 33, Column 12: Expression "isNull1" is not an rvalue
> [info] /* 001 */ public java.lang.Object generate(Object[] references) {
> [info] /* 002 */   return new SpecificMutableProjection(references);
> [info] /* 003 */ }
> [info] /* 004 */
> [info] /* 005 */ class SpecificMutableProjection extends 
> org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection {
> [info] /* 006 */
> [info] /* 007 */   private Object[] references;
> [info] /* 008 */   private MutableRow mutableRow;
> [info] /* 009 */   private Object[] values;
> [info] /* 010 */   private java.lang.String errMsg;
> [info] /* 011 */   private Object[] values1;
> [info] /* 012 */   private java.lang.String errMsg1;
> [info] /* 013 */   private boolean[] argIsNulls;
> [info] /* 014 */   private scala.collection.Seq argValue;
> [info] /* 015 */   private java.lang.String errMsg2;
> [info] /* 016 */   private boolean[] argIsNulls1;
> [info] /* 017 */   private scala.collection.Seq argValue1;
> [info] /* 018 */   private java.lang.String errMsg3;
> [info] /* 019 */   private java.lang.String errMsg4;
> [info] /* 020 */   private Object[] values2;
> [info] /* 021 */   private java.lang.String errMsg5;
> [info] /* 022 */   private boolean[] argIsNulls2;
> [info] /* 023 */   private scala.collection.Seq argValue2;
> [info] /* 024 */   private java.lang.String errMsg6;
> [info] /* 025 */   private boolean[] argIsNulls3;
> [info] /* 026 */   private scala.collection.Seq argValue3;
> [info] /* 027 */   private java.lang.String errMsg7;
> [info] /* 028 */   private boolean isNull_0;
> [info] /* 029 */   private InternalRow value_0;
> [info] /* 030 */
> [info] /* 031 */   private void apply_1(InternalRow i) {
> [info] /* 032 */
> [info] /* 033 */     if (isNull1) {
> [info] /* 034 */       throw new RuntimeException(errMsg3);
> [info] /* 035 */     }
> [info] /* 036 */
> [info] /* 037 */     boolean isNull24 = false;
> [info] /* 038 */     final com.tresata.spark.sql.Struct2 value24 = isNull24 ? 
> null : (com.tresata.spark.sql.Struct2) value1.a();
> [info] /* 039 */     isNull24 = value24 == null;
> [info] /* 040 */
> [info] /* 041 */     boolean isNull23 = isNull24;
> [info] /* 042 */     final scala.collection.Seq value23 = isNull23 ? null : 
> (scala.collection.Seq) value24.s2();
> [info] /* 043 */     isNull23 = value23 == null;
> [info] /* 044 */     argIsNulls1[0] = isNull23;
> [info] /* 045 */     argValue1 = value23;
> [info] /* 046 */
> [info] /* 047 */
> [info] /* 048 */
> [info] /* 049 */     boolean isNull22 = false;
> [info] /* 050 */     for (int idx = 0; idx < 1; idx++) {
> [info] /* 051 */       if (argIsNulls1[idx]) { isNull22 = true; break; }
> [info] /* 052 */     }
> [info] /* 053 */
> [info] /* 054 */     final ArrayData value22 = isNull22 ? null : new 
> org.apache.spark.sql.catalyst.util.GenericArrayData(argValue1);
> [info] /* 055 */     if (isNull22) {
> [info] /* 056 */       values1[2] = null;
> [info] /* 057 */     } else {
> [info] /* 058 */       values1[2] = value22;
> [info] /* 059 */     }
> [info] /* 060 */   }
> [info] /* 061 */
> [info] /* 062 */
> [info] /* 063 */   private void apply1_1(InternalRow i) {
> [info] /* 064 */
> [info] /* 065 */     if (isNull1) {
> [info] /* 066 */       throw new RuntimeException(errMsg7);
> [info] /* 067 */     }
> [info] /* 068 */
> [info] /* 069 */     boolean isNull41 = false;
> [info] /* 070 */     final com.tresata.spark.sql.Struct2 value41 = isNull41 ? 
> null : (com.tresata.spark.sql.Struct2) value1.b();
> [info] /* 071 */     isNull41 = value41 == null;
> [info] /* 072 */
> [info] /* 073 */     boolean isNull40 = isNull41;
> [info] /* 074 */     final scala.collection.Seq value40 = isNull40 ? null : 
> (scala.collection.Seq) value41.s2();
> [info] /* 075 */     isNull40 = value40 == null;
> [info] /* 076 */     argIsNulls3[0] = isNull40;
> [info] /* 077 */     argValue3 = value40;
> [info] /* 078 */
> [info] /* 079 */
> [info] /* 080 */
> [info] /* 081 */     boolean isNull39 = false;
> [info] /* 082 */     for (int idx = 0; idx < 1; idx++) {
> [info] /* 083 */       if (argIsNulls3[idx]) { isNull39 = true; break; }
> [info] /* 084 */     }
> [info] /* 085 */
> [info] /* 086 */     final ArrayData value39 = isNull39 ? null : new 
> org.apache.spark.sql.catalyst.util.GenericArrayData(argValue3);
> [info] /* 087 */     if (isNull39) {
> [info] /* 088 */       values2[2] = null;
> [info] /* 089 */     } else {
> [info] /* 090 */       values2[2] = value39;
> [info] /* 091 */     }
> [info] /* 092 */   }
> [info] /* 093 */
> [info] /* 094 */
> [info] /* 095 */   private void apply_0(InternalRow i) {
> [info] /* 096 */
> [info] /* 097 */     if (isNull1) {
> [info] /* 098 */       throw new RuntimeException(errMsg1);
> [info] /* 099 */     }
> [info] /* 100 */
> [info] /* 101 */     boolean isNull16 = false;
> [info] /* 102 */     final com.tresata.spark.sql.Struct2 value16 = isNull16 ? 
> null : (com.tresata.spark.sql.Struct2) value1.a();
> [info] /* 103 */     isNull16 = value16 == null;
> [info] /* 104 */
> [info] /* 105 */     boolean isNull15 = isNull16;
> [info] /* 106 */     final double value15 = isNull15 ? -1.0 : value16.d();
> [info] /* 107 */     if (isNull15) {
> [info] /* 108 */       values1[0] = null;
> [info] /* 109 */     } else {
> [info] /* 110 */       values1[0] = value15;
> [info] /* 111 */     }
> [info] /* 112 */     if (isNull1) {
> [info] /* 113 */       throw new RuntimeException(errMsg2);
> [info] /* 114 */     }
> [info] /* 115 */
> [info] /* 116 */     boolean isNull20 = false;
> [info] /* 117 */     final com.tresata.spark.sql.Struct2 value20 = isNull20 ? 
> null : (com.tresata.spark.sql.Struct2) value1.a();
> [info] /* 118 */     isNull20 = value20 == null;
> [info] /* 119 */
> [info] /* 120 */     boolean isNull19 = isNull20;
> [info] /* 121 */     final scala.collection.Seq value19 = isNull19 ? null : 
> (scala.collection.Seq) value20.s1();
> [info] /* 122 */     isNull19 = value19 == null;
> [info] /* 123 */     argIsNulls[0] = isNull19;
> [info] /* 124 */     argValue = value19;
> [info] /* 125 */
> [info] /* 126 */
> [info] /* 127 */
> [info] /* 128 */     boolean isNull18 = false;
> [info] /* 129 */     for (int idx = 0; idx < 1; idx++) {
> [info] /* 130 */       if (argIsNulls[idx]) { isNull18 = true; break; }
> [info] /* 131 */     }
> [info] /* 132 */
> [info] /* 133 */     final ArrayData value18 = isNull18 ? null : new 
> org.apache.spark.sql.catalyst.util.GenericArrayData(argValue);
> [info] /* 134 */     if (isNull18) {
> [info] /* 135 */       values1[1] = null;
> [info] /* 136 */     } else {
> [info] /* 137 */       values1[1] = value18;
> [info] /* 138 */     }
> [info] /* 139 */   }
> [info] /* 140 */
> [info] /* 141 */
> [info] /* 142 */   private void apply1_0(InternalRow i) {
> [info] /* 143 */
> [info] /* 144 */     if (isNull1) {
> [info] /* 145 */       throw new RuntimeException(errMsg5);
> [info] /* 146 */     }
> [info] /* 147 */
> [info] /* 148 */     boolean isNull33 = false;
> [info] /* 149 */     final com.tresata.spark.sql.Struct2 value33 = isNull33 ? 
> null : (com.tresata.spark.sql.Struct2) value1.b();
> [info] /* 150 */     isNull33 = value33 == null;
> [info] /* 151 */
> [info] /* 152 */     boolean isNull32 = isNull33;
> [info] /* 153 */     final double value32 = isNull32 ? -1.0 : value33.d();
> [info] /* 154 */     if (isNull32) {
> [info] /* 155 */       values2[0] = null;
> [info] /* 156 */     } else {
> [info] /* 157 */       values2[0] = value32;
> [info] /* 158 */     }
> [info] /* 159 */     if (isNull1) {
> [info] /* 160 */       throw new RuntimeException(errMsg6);
> [info] /* 161 */     }
> [info] /* 162 */
> [info] /* 163 */     boolean isNull37 = false;
> [info] /* 164 */     final com.tresata.spark.sql.Struct2 value37 = isNull37 ? 
> null : (com.tresata.spark.sql.Struct2) value1.b();
> [info] /* 165 */     isNull37 = value37 == null;
> [info] /* 166 */
> [info] /* 167 */     boolean isNull36 = isNull37;
> [info] /* 168 */     final scala.collection.Seq value36 = isNull36 ? null : 
> (scala.collection.Seq) value37.s1();
> [info] /* 169 */     isNull36 = value36 == null;
> [info] /* 170 */     argIsNulls2[0] = isNull36;
> [info] /* 171 */     argValue2 = value36;
> [info] /* 172 */
> [info] /* 173 */
> [info] /* 174 */
> [info] /* 175 */     boolean isNull35 = false;
> [info] /* 176 */     for (int idx = 0; idx < 1; idx++) {
> [info] /* 177 */       if (argIsNulls2[idx]) { isNull35 = true; break; }
> [info] /* 178 */     }
> [info] /* 179 */
> [info] /* 180 */     final ArrayData value35 = isNull35 ? null : new 
> org.apache.spark.sql.catalyst.util.GenericArrayData(argValue2);
> [info] /* 181 */     if (isNull35) {
> [info] /* 182 */       values2[1] = null;
> [info] /* 183 */     } else {
> [info] /* 184 */       values2[1] = value35;
> [info] /* 185 */     }
> [info] /* 186 */   }
> [info] /* 187 */
> [info] /* 188 */
> [info] /* 189 */   public SpecificMutableProjection(Object[] references) {
> [info] /* 190 */     this.references = references;
> [info] /* 191 */     mutableRow = new 
> org.apache.spark.sql.catalyst.expressions.GenericMutableRow(1);
> [info] /* 192 */     this.values = null;
> [info] /* 193 */     this.errMsg = (java.lang.String) references[1];
> [info] /* 194 */     this.values1 = null;
> [info] /* 195 */     this.errMsg1 = (java.lang.String) references[2];
> [info] /* 196 */     argIsNulls = new boolean[1];
> [info] /* 197 */
> [info] /* 198 */     this.errMsg2 = (java.lang.String) references[3];
> [info] /* 199 */     argIsNulls1 = new boolean[1];
> [info] /* 200 */
> [info] /* 201 */     this.errMsg3 = (java.lang.String) references[4];
> [info] /* 202 */     this.errMsg4 = (java.lang.String) references[5];
> [info] /* 203 */     this.values2 = null;
> [info] /* 204 */     this.errMsg5 = (java.lang.String) references[6];
> [info] /* 205 */     argIsNulls2 = new boolean[1];
> [info] /* 206 */
> [info] /* 207 */     this.errMsg6 = (java.lang.String) references[7];
> [info] /* 208 */     argIsNulls3 = new boolean[1];
> [info] /* 209 */
> [info] /* 210 */     this.errMsg7 = (java.lang.String) references[8];
> [info] /* 211 */     this.isNull_0 = true;
> [info] /* 212 */     this.value_0 = null;
> [info] /* 213 */   }
> [info] /* 214 */
> [info] /* 215 */   public 
> org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection 
> target(MutableRow row) {
> [info] /* 216 */     mutableRow = row;
> [info] /* 217 */     return this;
> [info] /* 218 */   }
> [info] /* 219 */
> [info] /* 220 */   /* Provide immutable access to the last projected row. */
> [info] /* 221 */   public InternalRow currentValue() {
> [info] /* 222 */     return (InternalRow) mutableRow;
> [info] /* 223 */   }
> [info] /* 224 */
> [info] /* 225 */   public java.lang.Object apply(java.lang.Object _i) {
> [info] /* 226 */     InternalRow i = (InternalRow) _i;
> [info] /* 227 */
> [info] /* 228 */
> [info] /* 229 */
> [info] /* 230 */     Object obj = ((Expression) references[0]).eval(null);
> [info] /* 231 */     org.apache.spark.sql.expressions.Aggregator value2 = 
> (org.apache.spark.sql.expressions.Aggregator) obj;
> [info] /* 232 */
> [info] /* 233 */     boolean isNull4 = i.isNullAt(0);
> [info] /* 234 */     UTF8String value4 = isNull4 ? null : 
> (i.getUTF8String(0));
> [info] /* 235 */
> [info] /* 236 */     boolean isNull3 = isNull4;
> [info] /* 237 */     final java.lang.String value3 = isNull3 ? null : 
> (java.lang.String) value4.toString();
> [info] /* 238 */     isNull3 = value3 == null;
> [info] /* 239 */     boolean isNull1 = false || isNull3;
> [info] /* 240 */     final com.tresata.spark.sql.Struct3 value1 = isNull1 ? 
> null : (com.tresata.spark.sql.Struct3) value2.finish(value3);
> [info] /* 241 */     isNull1 = value1 == null;
> [info] /* 242 */
> [info] /* 243 */     boolean isNull5 = false;
> [info] /* 244 */     InternalRow value5 = null;
> [info] /* 245 */     if (!false && isNull1) {
> [info] /* 246 */
> [info] /* 247 */       final InternalRow value7 = null;
> [info] /* 248 */       isNull5 = true;
> [info] /* 249 */       value5 = value7;
> [info] /* 250 */     } else {
> [info] /* 251 */
> [info] /* 252 */       boolean isNull8 = false;
> [info] /* 253 */       this.values = new Object[2];
> [info] /* 254 */       if (isNull1) {
> [info] /* 255 */         throw new RuntimeException(errMsg);
> [info] /* 256 */       }
> [info] /* 257 */
> [info] /* 258 */       boolean isNull11 = false;
> [info] /* 259 */       final com.tresata.spark.sql.Struct2 value11 = isNull11 
> ? null : (com.tresata.spark.sql.Struct2) value1.a();
> [info] /* 260 */       isNull11 = value11 == null;
> [info] /* 261 */       boolean isNull9 = false;
> [info] /* 262 */       InternalRow value9 = null;
> [info] /* 263 */       if (!false && isNull11) {
> [info] /* 264 */
> [info] /* 265 */         final InternalRow value13 = null;
> [info] /* 266 */         isNull9 = true;
> [info] /* 267 */         value9 = value13;
> [info] /* 268 */       } else {
> [info] /* 269 */
> [info] /* 270 */         boolean isNull14 = false;
> [info] /* 271 */         values1 = new Object[3];apply_0(i);
> [info] /* 272 */         apply_1(i);
> [info] /* 273 */         final InternalRow value14 = new 
> org.apache.spark.sql.catalyst.expressions.GenericInternalRow(values1);
> [info] /* 274 */         this.values1 = null;
> [info] /* 275 */         isNull9 = isNull14;
> [info] /* 276 */         value9 = value14;
> [info] /* 277 */       }
> [info] /* 278 */       if (isNull9) {
> [info] /* 279 */         values[0] = null;
> [info] /* 280 */       } else {
> [info] /* 281 */         values[0] = value9;
> [info] /* 282 */       }
> [info] /* 283 */       if (isNull1) {
> [info] /* 284 */         throw new RuntimeException(errMsg4);
> [info] /* 285 */       }
> [info] /* 286 */
> [info] /* 287 */       boolean isNull28 = false;
> [info] /* 288 */       final com.tresata.spark.sql.Struct2 value28 = isNull28 
> ? null : (com.tresata.spark.sql.Struct2) value1.b();
> [info] /* 289 */       isNull28 = value28 == null;
> [info] /* 290 */       boolean isNull26 = false;
> [info] /* 291 */       InternalRow value26 = null;
> [info] /* 292 */       if (!false && isNull28) {
> [info] /* 293 */
> [info] /* 294 */         final InternalRow value30 = null;
> [info] /* 295 */         isNull26 = true;
> [info] /* 296 */         value26 = value30;
> [info] /* 297 */       } else {
> [info] /* 298 */
> [info] /* 299 */         boolean isNull31 = false;
> [info] /* 300 */         values2 = new Object[3];apply1_0(i);
> [info] /* 301 */         apply1_1(i);
> [info] /* 302 */         final InternalRow value31 = new 
> org.apache.spark.sql.catalyst.expressions.GenericInternalRow(values2);
> [info] /* 303 */         this.values2 = null;
> [info] /* 304 */         isNull26 = isNull31;
> [info] /* 305 */         value26 = value31;
> [info] /* 306 */       }
> [info] /* 307 */       if (isNull26) {
> [info] /* 308 */         values[1] = null;
> [info] /* 309 */       } else {
> [info] /* 310 */         values[1] = value26;
> [info] /* 311 */       }
> [info] /* 312 */       final InternalRow value8 = new 
> org.apache.spark.sql.catalyst.expressions.GenericInternalRow(values);
> [info] /* 313 */       this.values = null;
> [info] /* 314 */       isNull5 = isNull8;
> [info] /* 315 */       value5 = value8;
> [info] /* 316 */     }
> [info] /* 317 */     this.isNull_0 = isNull5;
> [info] /* 318 */     this.value_0 = value5;
> [info] /* 319 */
> [info] /* 320 */     // copy all the results into MutableRow
> [info] /* 321 */
> [info] /* 322 */     if (!this.isNull_0) {
> [info] /* 323 */       mutableRow.update(0, this.value_0);
> [info] /* 324 */     } else {
> [info] /* 325 */       mutableRow.setNullAt(0);
> [info] /* 326 */     }
> [info] /* 327 */
> [info] /* 328 */     return mutableRow;
> [info] /* 329 */   }
> [info] /* 330 */ }
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to