在build dictionary时候使用DictEncode方法,但是其需要的三个表达式Expression
在实例化时继承自来自于TreeNode中自定义的trait TernaryLike。
case class DictEncode(left: Expression, mid: Expression, right: Expression)
extends TernaryExpression with ExpectsInputTypes
with TernaryLike{
def maxFields: Int = SQLConf.get.maxToStringFields
override def first: Expression = left
override def second: Expression = mid
override def third: Expression = right
如果去掉override,虽然编译可以通过,但是相应的在build dictionary时会抛出空指针异常。
2023-12-06T03:50:58,896 ERROR [logger-thread-0] application.JobMonitor :
handleResourceLack --> java.lang.NullPointerException
java.lang.RuntimeException: Error execute
org.apache.kylin.engine.spark.job.SegmentBuildJob
at
org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:135)
~[newten-job.jar:?]
at org.apache.spark.application.JobWorker$$anon$2.run(JobWorker.scala:56)
~[newten-job.jar:?]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_202]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_202]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_202]
Caused by: java.lang.NullPointerException
at
org.apache.spark.sql.catalyst.trees.TreeNode.containsChild$lzycompute(TreeNode.scala:121)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.containsChild(TreeNode.scala:121)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:357)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:322)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:322)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:407)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:405)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:358)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:322)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at
org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:306)
~[spark-catalyst_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at org.apache.spark.sql.Column.normalizedExpr(Column.scala:161)
~[spark-sql_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at org.apache.spark.sql.Column.hashCode(Column.scala:159)
~[spark-sql_2.12-3.1.1.1.1.0.jar:3.1.1.1.1.0]
at scala.runtime.Statics.anyHash(Statics.java:122)
~[scala-library-2.12.17.jar:?]
at scala.util.hashing.MurmurHash3.productHash(MurmurHash3.scala:76)
~[scala-library-2.12.17.jar:?]
at scala.util.hashing.MurmurHash3$.productHash(MurmurHash3.scala:246)
~[scala-library-2.12.17.jar:?]
at scala.runtime.ScalaRunTime$._hashCode(ScalaRunTime.scala:167)
~[scala-library-2.12.17.jar:?]
at scala.Tuple5.hashCode(Tuple5.scala:27) ~[scala-library-2.12.17.jar:?]
at scala.collection.mutable.FlatHashTable.addEntry(FlatHashTable.scala:153)
~[scala-library-2.12.17.jar:?]
at
scala.collection.mutable.FlatHashTable.addEntry$(FlatHashTable.scala:152)
~[scala-library-2.12.17.jar:?]
at scala.collection.mutable.HashSet.addEntry(HashSet.scala:41)
~[scala-library-2.12.17.jar:?]
at scala.collection.mutable.FlatHashTable.addElem(FlatHashTable.scala:144)
~[scala-library-2.12.17.jar:?]
at scala.collection.mutable.FlatHashTable.addElem$(FlatHashTable.scala:143)
~[scala-library-2.12.17.jar:?]
at scala.collection.mutable.HashSet.addElem(HashSet.scala:41)
~[scala-library-2.12.17.jar:?]
at scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:60)
~[scala-library-2.12.17.jar:?]
at scala.collection.mutable.HashSet.$plus$eq(HashSet.scala:41)
~[scala-library-2.12.17.jar:?]
at
scala.collection.mutable.GrowingBuilder.$plus$eq(GrowingBuilder.scala:32)
~[scala-library-2.12.17.jar:?]
at
scala.collection.mutable.GrowingBuilder.$plus$eq(GrowingBuilder.scala:30)
~[scala-library-2.12.17.jar:?]
at
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
~[scala-library-2.12.17.jar:?]
at scala.collection.Iterator.foreach(Iterator.scala:943)
~[scala-library-2.12.17.jar:?]
at scala.collection.Iterator.foreach$(Iterator.scala:943)
~[scala-library-2.12.17.jar:?]
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
~[scala-library-2.12.17.jar:?]
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
~[scala-library-2.12.17.jar:?]
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
~[scala-library-2.12.17.jar:?]
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
~[scala-library-2.12.17.jar:?]
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
~[scala-library-2.12.17.jar:?]
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
~[scala-library-2.12.17.jar:?]
at
scala.collection.mutable.AbstractSet.scala$collection$SetLike$$super$map(Set.scala:50)
~[scala-library-2.12.17.jar:?]
at scala.collection.SetLike.map(SetLike.scala:105)
~[scala-library-2.12.17.jar:?]
at scala.collection.SetLike.map$(SetLike.scala:105)
~[scala-library-2.12.17.jar:?]
at scala.collection.mutable.AbstractSet.map(Set.scala:50)
~[scala-library-2.12.17.jar:?]
at
org.apache.kylin.engine.spark.builder.DFTableEncoder$.encodeTable(DFTableEncoder.scala:62)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.encodeColumn(FlatTableAndDictBase.scala:569)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.buildDictIfNeed(FlatTableAndDictBase.scala:545)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.stage.build.FlatTableAndDictBase.buildDictIfNeed(FlatTableAndDictBase.scala:201)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.stage.build.BuildDict.execute(BuildDict.scala:31)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.stage.StageExec.toWork(StageExec.scala:116)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.stage.StageExec.toWork$(StageExec.scala:112)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.stage.build.BuildStage.toWork(BuildStage.scala:48)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.exec.BuildExec.$anonfun$buildSegment$1(BuildExec.scala:38)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.exec.BuildExec.$anonfun$buildSegment$1$adapted(BuildExec.scala:37)
~[newten-job.jar:?]
at scala.collection.Iterator.foreach(Iterator.scala:943)
~[scala-library-2.12.17.jar:?]
at scala.collection.Iterator.foreach$(Iterator.scala:943)
~[scala-library-2.12.17.jar:?]
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
~[scala-library-2.12.17.jar:?]
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
~[scala-library-2.12.17.jar:?]
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
~[scala-library-2.12.17.jar:?]
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
~[scala-library-2.12.17.jar:?]
at
org.apache.kylin.engine.spark.job.exec.BuildExec.buildSegment(BuildExec.scala:37)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.SegmentBuildJob.buildSegment(SegmentBuildJob.java:181)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.SegmentBuildJob.lambda$build$1(SegmentBuildJob.java:166)
~[newten-job.jar:?]
at java.util.Iterator.forEachRemaining(Iterator.java:116) ~[?:1.8.0_202]
at
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
~[?:1.8.0_202]
at
java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:580)
~[?:1.8.0_202]
at
org.apache.kylin.engine.spark.job.SegmentBuildJob.build(SegmentBuildJob.java:146)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.job.SegmentBuildJob.doExecute(SegmentBuildJob.java:108)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:319)
~[newten-job.jar:?]
at
org.apache.kylin.engine.spark.application.SparkApplication.execute(SparkApplication.java:133)
~[newten-job.jar:?]
... 4 more
发件人: MINGMING GE <[email protected]>
日期: 星期一, 2023年12月25日 19:17
收件人: [email protected] <[email protected]>
主题: Re: 关于特供spark版本的问题
External Email
Can you explain in detail the problem you encountered?
On Mon, Dec 25, 2023 at 4:50 PM Li, Can <[email protected]> wrote:
> 我们在替换spark版本为社区版本后发现部份build job会失败。
> 我们对比了社区版本的spark和kylin 特供的spark版本,发现在需要build global
> dictionary的时候这一块kylin的版本在jar包里自定义了一些接口方法,这些是否能在应用层去实现。
>