Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/13909#discussion_r93814696 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala --- @@ -56,33 +58,81 @@ case class CreateArray(children: Seq[Expression]) extends Expression { } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val arrayClass = classOf[GenericArrayData].getName - val values = ctx.freshName("values") - ctx.addMutableState("Object[]", values, s"this.$values = null;") - - ev.copy(code = s""" - this.$values = new Object[${children.size}];""" + - ctx.splitExpressions( - ctx.INPUT_ROW, - children.zipWithIndex.map { case (e, i) => - val eval = e.genCode(ctx) - eval.code + s""" - if (${eval.isNull}) { - $values[$i] = null; - } else { - $values[$i] = ${eval.value}; - } - """ - }) + - s""" - final ArrayData ${ev.value} = new $arrayClass($values); - this.$values = null; - """, isNull = "false") + val array = ctx.freshName("array") + + val et = dataType.elementType + val evals = children.map(e => e.genCode(ctx)) + val isPrimitiveArray = ctx.isPrimitiveType(et) + val (preprocess, arrayData) = + GenArrayData.getCodeArrayData(ctx, et, children.size, isPrimitiveArray, array) + + val assigns = if (isPrimitiveArray) { + val primitiveTypeName = ctx.primitiveTypeName(et) + evals.zipWithIndex.map { case (eval, i) => + eval.code + s""" + if (${eval.isNull}) { + $arrayData.setNullAt($i); + } else { + $arrayData.set$primitiveTypeName($i, ${eval.value}); + } + """ + } + } else { + evals.zipWithIndex.map { case (eval, i) => + eval.code + s""" + if (${eval.isNull}) { + $array[$i] = null; + } else { + $array[$i] = ${eval.value}; + } + """ + } + } + ev.copy(code = --- End diff -- @cloud-fan I love this idea. However, when I have just implemented this, I hit the following problem. Janino throws an exception. IMHO, [this part](https://github.com/janino-compiler/janino/blob/janino_3.0.0/janino/src/org/codehaus/janino/UnitCompiler.java#L4331-L4348) should optimize without throwing an exception. We may have some options 1. remove `if (... instanceof ...)` for projection in Spark 2. submit a PR to janino now to throw this exception and wait for this change until the new janino with this PR will be available 3. submit a PR to janino now to throw this exception and postpone this change later 4. others what do you think? ```java ... /* 037 */ UTF8String value7 = (UTF8String) obj5; /* 038 */ if (false) { /* 039 */ array1[0] = null; /* 040 */ } else { /* 041 */ array1[0] = value7; /* 042 */ } ... /* 068 */ Object obj9 = ((Expression) references[9]).eval(null); /* 069 */ UTF8String value11 = (UTF8String) obj9; /* 070 */ if (false) { /* 071 */ array1[4] = null; /* 072 */ } else { /* 073 */ array1[4] = value11; /* 074 */ } /* 075 */ org.apache.spark.sql.catalyst.util.GenericArrayData arrayData1 = new org.apache.spark.sql.catalyst.util.GenericArrayData(array1); /* 076 */ // Remember the current cursor so that we can calculate how many bytes are /* 077 */ // written later. /* 078 */ final int tmpCursor2 = holder.cursor; /* 079 */ /* 080 */ if (arrayData1 instanceof UnsafeArrayData) { /* 081 */ /* 082 */ final int sizeInBytes1 = ((UnsafeArrayData) arrayData1).getSizeInBytes(); /* 083 */ // grow the global buffer before writing data. /* 084 */ holder.grow(sizeInBytes1); /* 085 */ ((UnsafeArrayData) arrayData1).writeToMemory(holder.buffer, holder.cursor); /* 086 */ holder.cursor += sizeInBytes1; /* 087 */ /* 088 */ } else { ... org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 80, Column 26: "org.apache.spark.sql.catalyst.util.GenericArrayData" can never be an instance of "org.apache.spark.sql.catalyst.expressions.UnsafeArrayData" at org.codehaus.janino.UnitCompiler.compileError(UnitCompiler.java:11004) at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4345) at org.codehaus.janino.UnitCompiler.access$7400(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$12.visitInstanceof(UnitCompiler.java:3773) at org.codehaus.janino.UnitCompiler$12.visitInstanceof(UnitCompiler.java:3762) at org.codehaus.janino.Java$Instanceof.accept(Java.java:4074) at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:3762) at org.codehaus.janino.UnitCompiler.compileGetValue(UnitCompiler.java:4933) at org.codehaus.janino.UnitCompiler.compileBoolean2(UnitCompiler.java:3369) at org.codehaus.janino.UnitCompiler.access$5400(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$10.visitInstanceof(UnitCompiler.java:3335) at org.codehaus.janino.UnitCompiler$10.visitInstanceof(UnitCompiler.java:3324) at org.codehaus.janino.Java$Instanceof.accept(Java.java:4074) at org.codehaus.janino.UnitCompiler.compileBoolean(UnitCompiler.java:3324) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2216) at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378) at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370) at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370) at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:2811) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1262) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1234) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:538) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:890) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:894) at org.codehaus.janino.UnitCompiler.access$600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:377) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1128) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1209) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:564) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:420) at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:374) at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$AbstractPackageMemberClassDeclaration.accept(Java.java:1309) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:345) at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:396) at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:311) at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:196) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:91) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:935) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:998) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:995) at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2257) at com.google.common.cache.LocalCache.get(LocalCache.java:4000) at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:4004) at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:890) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:405) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:359) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:32) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:874) at org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.create(Projection.scala:130) at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper$$anonfun$2.apply(ExpressionEvalHelper.scala:158) at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper$$anonfun$2.apply(ExpressionEvalHelper.scala:158) at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper$class.generateProject(ExpressionEvalHelper.scala:105) at org.apache.spark.sql.catalyst.expressions.ComplexTypeSuite.generateProject(ComplexTypeSuite.scala:27) at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper$class.checkEvalutionWithUnsafeProjection(ExpressionEvalHelper.scala:157) at org.apache.spark.sql.catalyst.expressions.ComplexTypeSuite.checkEvalutionWithUnsafeProjection(ComplexTypeSuite.scala:27) at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper$class.checkEvaluation(ExpressionEvalHelper.scala:53) at org.apache.spark.sql.catalyst.expressions.ComplexTypeSuite.checkEvaluation(ComplexTypeSuite.scala:27) at org.apache.spark.sql.catalyst.expressions.ComplexTypeSuite$$anonfun$5.apply$mcV$sp(ComplexTypeSuite.scala:128) at org.apache.spark.sql.catalyst.expressions.ComplexTypeSuite$$anonfun$5.apply(ComplexTypeSuite.scala:120) at org.apache.spark.sql.catalyst.expressions.ComplexTypeSuite$$anonfun$5.apply(ComplexTypeSuite.scala:120) at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:68) at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) at org.scalatest.FunSuite.runTest(FunSuite.scala:1555) at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413) at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401) at scala.collection.immutable.List.foreach(List.scala:381) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208) at org.scalatest.FunSuite.runTests(FunSuite.scala:1555) at org.scalatest.Suite$class.run(Suite.scala:1424) at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555) at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) at org.scalatest.SuperEngine.runImpl(Engine.scala:545) at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212) at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:31) at org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257) at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256) at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:31) at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55) at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563) at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557) at scala.collection.immutable.List.foreach(List.scala:381) at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557) at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044) at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043) at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722) at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043) at org.scalatest.tools.Runner$.run(Runner.scala:883) at org.scalatest.tools.Runner.run(Runner.scala) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:138) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147) ```
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org