[jira] [Comment Edited] (SPARK-5236) java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableAny cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableInt
[ https://issues.apache.org/jira/browse/SPARK-5236?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15936222#comment-15936222 ] Jorge Machado edited comment on SPARK-5236 at 3/22/17 12:52 PM: [~marmbrus] Hi Michael, so I'm experience the same issue. I'm building a datasource for Hbase with some custom schema. I'm on 1.6.3 I traced down to GeneratePredicates.scala (r: InternalRow) => p.eval(r) and on my example it tries to do instanceOf[MutableLong] from a String which fails. I have a filter on the dataframe and a groupby count {noformat} /** * * @param schema this is how the row has to look like. The returned value from the next must match this schema * @param hBaseRelation * @param repositoryHistory * @param timeZoneId * @param tablePartitionInfo * @param from * @param to */ class TagValueSparkIterator(val hBaseRelation: HBaseRelation, val schema: StructType, val repositoryHistory: DeviceHistoryRepository, val timeZoneId: String, val tablePartitionInfo: TablePartitionInfo, val from: Long, val to: Long) extends Iterator[InternalRow] { private val internalItr: ClosableIterator[TagValue[Double]]= repositoryHistory.scanTagValues(from, to, tablePartitionInfo) override def hasNext: Boolean = internalItr.hasNext override def next(): InternalRow = { val tagValue = internalItr.next() val instant = ZonedDateTime.ofInstant(Instant.ofEpochSecond(tagValue.getTimestamp), ZoneId.of(timeZoneId)).toInstant val timestamp = Timestamp.from(instant) InternalRow.fromSeq(Array(tagValue.getTimestamp,tagValue.getGuid,tagValue.getGuid,tagValue.getValue)) val mutableRow = new SpecificMutableRow(schema.fields.map(f=> f.dataType)) for (i <- schema.fields.indices){ updateMutableRow(i,tagValue,mutableRow, schema(i) ) } mutableRow } def updateMutableRow(i: Int, tagValue: TagValue[Double], row: SpecificMutableRow, field:StructField): Unit = { //#TODO this is ugly. field.name match { case "Date" => row.setLong(i,tagValue.getTimestamp.toLong) case "Device" => row.update(i,UTF8String.fromString(tagValue.getGuid)) case "Tag" => row.update(i,UTF8String.fromString(tagValue.getTagName)) case "TagValue" => row.setDouble(i,tagValue.getValue) } } override def toString():String ={ "Iterator for Region Name "+tablePartitionInfo.getRegionName+" Range:"+from+ "until" + "to" } } {noformat} Then I get : {noformat} Driver stacktrace: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableAny cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableLong at org.apache.spark.sql.catalyst.expressions.SpecificMutableRow.getLong(SpecificMutableRow.scala:301) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificPredicate.eval(Unknown Source) at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$$anonfun$create$2.apply(GeneratePredicate.scala:68) at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$$anonfun$create$2.apply(GeneratePredicate.scala:68) at org.apache.spark.sql.execution.Filter$$anonfun$2$$anonfun$apply$2.apply(basicOperators.scala:74) at org.apache.spark.sql.execution.Filter$$anonfun$2$$anonfun$apply$2.apply(basicOperators.scala:72) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:390) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) {noformat} was (Author: jomach): [~marmbrus] Hi Michael, so I'm experience the same issue. I'm building a datasource for Hbase with some custom schema. I'm on 1.6.3 I traced down to GeneratePredicates.scala (r: InternalRow) => p.eval(r) {noformat} /** * * @param schema this is how the row has to look like. The returned value from the next must match this schema * @param hBaseRelation * @param repositoryHistory * @param timeZoneId * @param tablePartitionInfo * @param from * @param to */ class TagValueSparkIterator(val hBaseRelation: HBaseRelation,
[jira] [Comment Edited] (SPARK-5236) java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableAny cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableInt
[ https://issues.apache.org/jira/browse/SPARK-5236?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15936222#comment-15936222 ] Jorge Machado edited comment on SPARK-5236 at 3/22/17 12:47 PM: [~marmbrus] Hi Michael, so I'm experience the same issue. I'm building a datasource for Hbase with some custom schema. I'm on 1.6.3 I traced down to GeneratePredicates.scala (r: InternalRow) => p.eval(r) {noformat} /** * * @param schema this is how the row has to look like. The returned value from the next must match this schema * @param hBaseRelation * @param repositoryHistory * @param timeZoneId * @param tablePartitionInfo * @param from * @param to */ class TagValueSparkIterator(val hBaseRelation: HBaseRelation, val schema: StructType, val repositoryHistory: DeviceHistoryRepository, val timeZoneId: String, val tablePartitionInfo: TablePartitionInfo, val from: Long, val to: Long) extends Iterator[InternalRow] { private val internalItr: ClosableIterator[TagValue[Double]]= repositoryHistory.scanTagValues(from, to, tablePartitionInfo) override def hasNext: Boolean = internalItr.hasNext override def next(): InternalRow = { val tagValue = internalItr.next() val instant = ZonedDateTime.ofInstant(Instant.ofEpochSecond(tagValue.getTimestamp), ZoneId.of(timeZoneId)).toInstant val timestamp = Timestamp.from(instant) InternalRow.fromSeq(Array(tagValue.getTimestamp,tagValue.getGuid,tagValue.getGuid,tagValue.getValue)) val mutableRow = new SpecificMutableRow(schema.fields.map(f=> f.dataType)) for (i <- schema.fields.indices){ updateMutableRow(i,tagValue,mutableRow, schema(i) ) } mutableRow } def updateMutableRow(i: Int, tagValue: TagValue[Double], row: SpecificMutableRow, field:StructField): Unit = { //#TODO this is ugly. field.name match { case "Date" => row.setLong(i,tagValue.getTimestamp.toLong) case "Device" => row.update(i,UTF8String.fromString(tagValue.getGuid)) case "Tag" => row.update(i,UTF8String.fromString(tagValue.getTagName)) case "TagValue" => row.setDouble(i,tagValue.getValue) } } override def toString():String ={ "Iterator for Region Name "+tablePartitionInfo.getRegionName+" Range:"+from+ "until" + "to" } } {noformat} Then I get : {noformat} Driver stacktrace: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableAny cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableLong at org.apache.spark.sql.catalyst.expressions.SpecificMutableRow.getLong(SpecificMutableRow.scala:301) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificPredicate.eval(Unknown Source) at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$$anonfun$create$2.apply(GeneratePredicate.scala:68) at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$$anonfun$create$2.apply(GeneratePredicate.scala:68) at org.apache.spark.sql.execution.Filter$$anonfun$2$$anonfun$apply$2.apply(basicOperators.scala:74) at org.apache.spark.sql.execution.Filter$$anonfun$2$$anonfun$apply$2.apply(basicOperators.scala:72) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:390) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) {noformat} was (Author: jomach): [~marmbrus] Hi Michael, so I'm experience the same issue. I'm building a datasource for Hbase with some custom schema. {noformat} /** * * @param schema this is how the row has to look like. The returned value from the next must match this schema * @param hBaseRelation * @param repositoryHistory * @param timeZoneId * @param tablePartitionInfo * @param from * @param to */ class TagValueSparkIterator(val hBaseRelation: HBaseRelation, val schema: StructType, val repositoryHistory: DeviceHistoryRepository, val timeZoneId: String,