wForget commented on code in PR #47303: URL: https://github.com/apache/spark/pull/47303#discussion_r1675201152
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala: ########## @@ -2685,18 +2685,32 @@ case class Chr(child: Expression) case class Base64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { + lazy val chunkBase64: Boolean = SQLConf.get.chunkBase64StringEnabled + lazy val encoder: JBase64.Encoder = if (chunkBase64) { + JBase64.getMimeEncoder + } else { + JBase64.getMimeEncoder(-1, Array()) + } + override def dataType: DataType = SQLConf.get.defaultStringType override def inputTypes: Seq[DataType] = Seq(BinaryType) protected override def nullSafeEval(bytes: Any): Any = { - UTF8String.fromBytes(JBase64.getMimeEncoder.encode(bytes.asInstanceOf[Array[Byte]])) + UTF8String.fromBytes(encoder.encode(bytes.asInstanceOf[Array[Byte]])) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { nullSafeCodeGen(ctx, ev, (child) => { - s"""${ev.value} = UTF8String.fromBytes( - ${classOf[JBase64].getName}.getMimeEncoder().encode($child)); - """}) + if (chunkBase64) { + s"""${ev.value} = UTF8String.fromBytes( + ${classOf[JBase64].getName}.getMimeEncoder().encode($child)); Review Comment: > Why don't we use the encoder directly? `java.util.Base64$Encoder` is not serializable. ``` -- !query select base64(c7), base64(c8), base64(v), ascii(s) from char_tbl4 -- !query schema struct<> -- !query output java.io.NotSerializableException java.util.Base64$Encoder Serialization stack: - object not serializable (class: java.util.Base64$Encoder, value: java.util.Base64$Encoder@423ed07f) - element of array (index: 2) - array (class [Ljava.lang.Object;, size 5) - field (class: org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory, name: org$apache$spark$sql$execution$WholeStageCodegenEvaluatorFactory$$references, type: class [Ljava.lang.Object;) - object (class org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory, org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory@2fd9633e) - element of array (index: 0) - array (class [Ljava.lang.Object;, size 1) - field (class: java.lang.invoke.SerializedLambda, name: capturedArgs, type: class [Ljava.lang.Object;) - object (class java.lang.invoke.SerializedLambda, SerializedLambda[capturingClass=class org.apache.spark.sql.execution.WholeStageCodegenExec, functionalInterfaceMethod=scala/Function2.apply:(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;, implementation=invokeStatic org/apache/spark/sql/execution/WholeStageCodegenExec.$anonfun$doExecute$4$adapted:(Lorg/apache/spark/sql/execution/WholeStageCodegenEvaluatorFactory;Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, instantiatedMethodType=(Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, numCaptured=1]) - writeReplace data (class: java.lang.invoke.SerializedLambda) - object (class org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2458/0x000002cf3e949c30, org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2458/0x000002cf3e949c30@603a0fa7) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org