[jira] [Commented] (SPARK-22134) StackOverflowError issue when applying large nested UDF calls
[ https://issues.apache.org/jira/browse/SPARK-22134?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16181874#comment-16181874 ] guoxiaolongzte commented on SPARK-22134: Increase the parameters spark.driver.extraJavaOptions=‘-Xss5M’, Increase -Xss value. > StackOverflowError issue when applying large nested UDF calls > - > > Key: SPARK-22134 > URL: https://issues.apache.org/jira/browse/SPARK-22134 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.1.0 > Environment: Spark 2.1.0 on Cloudera CDH 5u8 >Reporter: Andrew Hu Zheng > > Spark throws a StackOverflowError whenever there is a large nested call of > UDFs. > I have tried increasing the memory, but the same issue still happens. > Sample code of the nested calls : > {code:java} > val v4 = > u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat($"C0_0", > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"); > {code} > stack trace > {code:java} > java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at com.informatica.compiler.InfaSparkMain$.main(InfaSparkMain.scala:74) > at com.informatica.compiler.InfaSparkMain.main(InfaSparkMain.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:637) > Caused by: java.lang.StackOverflowError > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:358) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5$$anonfun$apply$11.apply(TreeNode.scala:360) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:358) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5$$anonfun$apply$11.apply(TreeNode.scala:360) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(
[jira] [Commented] (SPARK-22134) StackOverflowError issue when applying large nested UDF calls
[ https://issues.apache.org/jira/browse/SPARK-22134?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16181782#comment-16181782 ] Andrew Hu Zheng commented on SPARK-22134: - Sean, I understand what you are trying to say. However, we had this similar example working fine in Hive. I had an inclination that Spark would be able to handle something of this magnitude. > StackOverflowError issue when applying large nested UDF calls > - > > Key: SPARK-22134 > URL: https://issues.apache.org/jira/browse/SPARK-22134 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.1.0 > Environment: Spark 2.1.0 on Cloudera CDH 5u8 >Reporter: Andrew Hu Zheng >Priority: Critical > > Spark throws a StackOverflowError whenever there is a large nested call of > UDFs. > I have tried increasing the memory, but the same issue still happens. > Sample code of the nested calls : > {code:java} > val v4 = > u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat($"C0_0", > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"); > {code} > stack trace > {code:java} > java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at com.informatica.compiler.InfaSparkMain$.main(InfaSparkMain.scala:74) > at com.informatica.compiler.InfaSparkMain.main(InfaSparkMain.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:637) > Caused by: java.lang.StackOverflowError > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:358) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5$$anonfun$apply$11.apply(TreeNode.scala:360) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:358) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.
[jira] [Commented] (SPARK-22134) StackOverflowError issue when applying large nested UDF calls
[ https://issues.apache.org/jira/browse/SPARK-22134?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16181650#comment-16181650 ] Sean Owen commented on SPARK-22134: --- I mean, if you chain enough method calls, of course this will happen. This looks like a very contrived example. I don't think this can be considered a bug. > StackOverflowError issue when applying large nested UDF calls > - > > Key: SPARK-22134 > URL: https://issues.apache.org/jira/browse/SPARK-22134 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.1.0 > Environment: Spark 2.1.0 on Cloudera CDH 5u8 >Reporter: Andrew Hu Zheng >Priority: Critical > > Spark throws a StackOverflowError whenever there is a large nested call of > UDFs. > I have tried increasing the memory, but the same issue still happens. > Sample code of the nested calls : > {code:java} > val v4 = > u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat(u_concat($"C0_0", > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), $"C0_0"), > $"C0_0"); > {code} > stack trace > {code:java} > java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at com.informatica.compiler.InfaSparkMain$.main(InfaSparkMain.scala:74) > at com.informatica.compiler.InfaSparkMain.main(InfaSparkMain.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:637) > Caused by: java.lang.StackOverflowError > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:358) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5$$anonfun$apply$11.apply(TreeNode.scala:360) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:358) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5$$anonfun$apply$11.apply