[ https://issues.apache.org/jira/browse/SPARK-15154?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15273885#comment-15273885 ]
Pete Robbins edited comment on SPARK-15154 at 5/6/16 10:27 AM: --------------------------------------------------------------- [~davies] as you are the author of this code can you comment on my findings? So the issue here is that the keyGenerator returns an UnsafeRow containing Int values but the code below from LongHashedRelation.apply retrieves the key from this as a Long. The bytes in the row are on Little Endian: 01 00 00 00 00 00 00 00 on Big Endian: 00 00 00 01 00 00 00 00 By chance getInt and getLong will both return "1" on Little Endian whereas on Big Endian getInt returns "1" but get Long will return "268435456" {quote} val keyGenerator = UnsafeProjection.create(key) // Create a mapping of key -> rows var numFields = 0 while (input.hasNext) { val unsafeRow = input.next().asInstanceOf[UnsafeRow] numFields = unsafeRow.numFields() val rowKey = keyGenerator(unsafeRow) if (!rowKey.isNullAt(0)) { val key = rowKey.getLong(0) // <<<---- Values in rowKey are Int not Long map.append(key, unsafeRow) } } {quote} was (Author: robbinspg): [~davies] as you are the author of this code can you comment on my findings? So the issue here is that the keyGenerator returns an UnsafeRow containing Int values but the code below from LongHashedRelation.apply retrieves the key from this as a Long. The bytes in the row are on Little Endian: 01 00 00 00 00 00 00 00 on Big Endian: 00 00 00 01 00 00 00 00 By chance getInt and getLong will both return "1" on Little Endian whereas on Big Endian getInt returns "1" but get Long will return "268435456" ``` val keyGenerator = UnsafeProjection.create(key) // Create a mapping of key -> rows var numFields = 0 while (input.hasNext) { val unsafeRow = input.next().asInstanceOf[UnsafeRow] numFields = unsafeRow.numFields() val rowKey = keyGenerator(unsafeRow) if (!rowKey.isNullAt(0)) { val key = rowKey.getLong(0) // <<<---- Values in rowKey are Int not Long map.append(key, unsafeRow) } } ``` > LongHashedRelation fails on Big Endian platform > ----------------------------------------------- > > Key: SPARK-15154 > URL: https://issues.apache.org/jira/browse/SPARK-15154 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.0.0 > Reporter: Pete Robbins > Labels: big-endian > > NPE in > org.apache.spark.sql.execution.joins.HashedRelationSuite.LongToUnsafeRowMap > Error Message > java.lang.NullPointerException was thrown. > Stacktrace > java.lang.NullPointerException > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3$$anonfun$apply$mcV$sp$1.apply$mcVI$sp(HashedRelationSuite.scala:121) > at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply$mcV$sp(HashedRelationSuite.scala:119) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.apache.spark.sql.execution.joins.HashedRelationSuite$$anonfun$3.apply(HashedRelationSuite.scala:112) > at > org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) > at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) > at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > at org.scalatest.Transformer.apply(Transformer.scala:22) > at org.scalatest.Transformer.apply(Transformer.scala:20) > at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) > at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:57) > at > org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) > at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) > at org.scalatest.FunSuite.runTest(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) > at > org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396) > at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) > at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208) > at org.scalatest.FunSuite.runTests(FunSuite.scala:1555) > at org.scalatest.Suite$class.run(Suite.scala:1424) > at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at org.scalatest.SuperEngine.runImpl(Engine.scala:545) > at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212) > at > org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:29) > at > org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257) > at > org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256) > at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:29) > at org.scalatest.Suite$class.callExecuteOnSuite$1(Suite.scala:1492) > at > org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1528) > at > org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1526) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at org.scalatest.Suite$class.runNestedSuites(Suite.scala:1526) > at > org.scalatest.tools.DiscoverySuite.runNestedSuites(DiscoverySuite.scala:29) > at org.scalatest.Suite$class.run(Suite.scala:1421) > at org.scalatest.tools.DiscoverySuite.run(DiscoverySuite.scala:29) > at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55) > at > org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563) > at > org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557) > at > org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044) > at > org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043) > at > org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722) > at > org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043) > at org.scalatest.tools.Runner$.main(Runner.scala:860) > at org.scalatest.tools.Runner.main(Runner.scala) > Most likely this will be caused by endianness of the bye arrays etc. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org