[jira] [Commented] (SPARK-15154) LongHashedRelation fails on Big Endian platform
[ https://issues.apache.org/jira/browse/SPARK-15154?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15276552#comment-15276552 ] Apache Spark commented on SPARK-15154: -- User 'robbinspg' has created a pull request for this issue: https://github.com/apache/spark/pull/13009 > LongHashedRelation fails on Big Endian platform > --- > > Key: SPARK-15154 > URL: https://issues.apache.org/jira/browse/SPARK-15154 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 2.0.0 >Reporter: Pete Robbins > Labels: big-endian > > NPE in > org.apache.spark.sql.execution.joins.HashedRelationSuite.LongToUnsafeRowMap > Error Message > java.lang.NullPointerException was thrown. > Stacktrace > java.lang.NullPointerException > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3$$anonfun$apply$mcV$sp$1.apply$mcVI$sp(HashedRelationSuite.scala:121) > at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply$mcV$sp(HashedRelationSuite.scala:119) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) > at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) > at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > at org.scalatest.Transformer.apply(Transformer.scala:22) > at org.scalatest.Transformer.apply(Transformer.scala:20) > at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) > at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:57) > at > org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) > at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) > at org.scalatest.FunSuite.runTest(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) > at > org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396) > at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) > at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208) > at org.scalatest.FunSuite.runTests(FunSuite.scala:1555) > at org.scalatest.Suite$class.run(Suite.scala:1424) > at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at org.scalatest.SuperEngine.runImpl(Engine.scala:545) > at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212) > at > org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:29) > at > org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257) > at > org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256) > at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:29) > at org.scalatest.Suite$class.callExecuteOnSuite$1(Suite.scala:1492) > at > org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1528) > at > org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1526) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at org.scalatest.Suite$class.runNestedSuites(Suite.scala:1526) > at > org.scalatest.tools.DiscoverySuite.runNestedSuites(DiscoverySuite.scala:29) > at org.scalatest.Suite$class.run(Suite.scala:1421) > at org.scalatest.tools.DiscoverySuite.run(DiscoverySuite.scala:29) > at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55) > at >
[jira] [Commented] (SPARK-15154) LongHashedRelation fails on Big Endian platform
[ https://issues.apache.org/jira/browse/SPARK-15154?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15276543#comment-15276543 ] Pete Robbins commented on SPARK-15154: -- I'm convinced the test is invalid. The creation of LongHashedRelation is guarded by {code} if (key.length == 1 && key.head.dataType == LongType) { LongHashedRelation(input, key, sizeEstimate, mm) } {code} In this failing test the key dataType is IntegerType I'll submit a PR to fix the tests > LongHashedRelation fails on Big Endian platform > --- > > Key: SPARK-15154 > URL: https://issues.apache.org/jira/browse/SPARK-15154 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 2.0.0 >Reporter: Pete Robbins > Labels: big-endian > > NPE in > org.apache.spark.sql.execution.joins.HashedRelationSuite.LongToUnsafeRowMap > Error Message > java.lang.NullPointerException was thrown. > Stacktrace > java.lang.NullPointerException > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3$$anonfun$apply$mcV$sp$1.apply$mcVI$sp(HashedRelationSuite.scala:121) > at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply$mcV$sp(HashedRelationSuite.scala:119) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) > at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) > at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > at org.scalatest.Transformer.apply(Transformer.scala:22) > at org.scalatest.Transformer.apply(Transformer.scala:20) > at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) > at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:57) > at > org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) > at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) > at org.scalatest.FunSuite.runTest(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) > at > org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396) > at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) > at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208) > at org.scalatest.FunSuite.runTests(FunSuite.scala:1555) > at org.scalatest.Suite$class.run(Suite.scala:1424) > at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at org.scalatest.SuperEngine.runImpl(Engine.scala:545) > at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212) > at > org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:29) > at > org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257) > at > org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256) > at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:29) > at org.scalatest.Suite$class.callExecuteOnSuite$1(Suite.scala:1492) > at > org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1528) > at > org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1526) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at org.scalatest.Suite$class.runNestedSuites(Suite.scala:1526) > at > org.scalatest.tools.DiscoverySuite.runNestedSuites(DiscoverySuite.scala:29) > at
[jira] [Commented] (SPARK-15154) LongHashedRelation fails on Big Endian platform
[ https://issues.apache.org/jira/browse/SPARK-15154?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15273897#comment-15273897 ] Pete Robbins commented on SPARK-15154: -- Is this just a testcase issue where in HashedRelationSuite {code} val key = Seq(BoundReference(0, IntegerType, false)) {code} should be {code} val key = Seq(BoundReference(0, LongType, false)) {code} > LongHashedRelation fails on Big Endian platform > --- > > Key: SPARK-15154 > URL: https://issues.apache.org/jira/browse/SPARK-15154 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 2.0.0 >Reporter: Pete Robbins > Labels: big-endian > > NPE in > org.apache.spark.sql.execution.joins.HashedRelationSuite.LongToUnsafeRowMap > Error Message > java.lang.NullPointerException was thrown. > Stacktrace > java.lang.NullPointerException > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3$$anonfun$apply$mcV$sp$1.apply$mcVI$sp(HashedRelationSuite.scala:121) > at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply$mcV$sp(HashedRelationSuite.scala:119) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) > at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) > at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > at org.scalatest.Transformer.apply(Transformer.scala:22) > at org.scalatest.Transformer.apply(Transformer.scala:20) > at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) > at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:57) > at > org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) > at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) > at org.scalatest.FunSuite.runTest(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) > at > org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396) > at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) > at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208) > at org.scalatest.FunSuite.runTests(FunSuite.scala:1555) > at org.scalatest.Suite$class.run(Suite.scala:1424) > at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at org.scalatest.SuperEngine.runImpl(Engine.scala:545) > at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212) > at > org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:29) > at > org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257) > at > org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256) > at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:29) > at org.scalatest.Suite$class.callExecuteOnSuite$1(Suite.scala:1492) > at > org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1528) > at > org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1526) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at org.scalatest.Suite$class.runNestedSuites(Suite.scala:1526) > at > org.scalatest.tools.DiscoverySuite.runNestedSuites(DiscoverySuite.scala:29) > at org.scalatest.Suite$class.run(Suite.scala:1421) > at org.scalatest.tools.DiscoverySuite.run(DiscoverySuite.scala:29) >
[jira] [Commented] (SPARK-15154) LongHashedRelation fails on Big Endian platform
[ https://issues.apache.org/jira/browse/SPARK-15154?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15273885#comment-15273885 ] Pete Robbins commented on SPARK-15154: -- [~davies] as you are the author of this code can you comment on my findings? So the issue here is that the keyGenerator returns an UnsafeRow containing Int values but the code below from LongHashedRelation.apply retrieves the key from this as a Long. The bytes in the row are on Little Endian: 01 00 00 00 00 00 00 00 on Big Endian: 00 00 00 01 00 00 00 00 By chance getInt and getLong will both return "1" on Little Endian whereas on Big Endian getInt returns "1" but get Long will return "268435456" val keyGenerator = UnsafeProjection.create(key) // Create a mapping of key -> rows var numFields = 0 while (input.hasNext) { val unsafeRow = input.next().asInstanceOf[UnsafeRow] numFields = unsafeRow.numFields() val rowKey = keyGenerator(unsafeRow) if (!rowKey.isNullAt(0)) { val key = rowKey.getLong(0) // <<< Values in rowKey are Int not Long map.append(key, unsafeRow) } } > LongHashedRelation fails on Big Endian platform > --- > > Key: SPARK-15154 > URL: https://issues.apache.org/jira/browse/SPARK-15154 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 2.0.0 >Reporter: Pete Robbins > Labels: big-endian > > NPE in > org.apache.spark.sql.execution.joins.HashedRelationSuite.LongToUnsafeRowMap > Error Message > java.lang.NullPointerException was thrown. > Stacktrace > java.lang.NullPointerException > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3$$anonfun$apply$mcV$sp$1.apply$mcVI$sp(HashedRelationSuite.scala:121) > at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply$mcV$sp(HashedRelationSuite.scala:119) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) > at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) > at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > at org.scalatest.Transformer.apply(Transformer.scala:22) > at org.scalatest.Transformer.apply(Transformer.scala:20) > at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) > at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:57) > at > org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) > at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) > at org.scalatest.FunSuite.runTest(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) > at > org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396) > at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) > at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208) > at org.scalatest.FunSuite.runTests(FunSuite.scala:1555) > at org.scalatest.Suite$class.run(Suite.scala:1424) > at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at org.scalatest.SuperEngine.runImpl(Engine.scala:545) > at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212) > at > org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:29) > at > org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257) > at > org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256) > at