[jira] [Assigned] (SPARK-32380) sparksql cannot access hive table while data in hbase
[ https://issues.apache.org/jira/browse/SPARK-32380?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Apache Spark reassigned SPARK-32380: Assignee: Apache Spark > sparksql cannot access hive table while data in hbase > - > > Key: SPARK-32380 > URL: https://issues.apache.org/jira/browse/SPARK-32380 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 3.0.0 > Environment: ||component||version|| > |hadoop|2.8.5| > |hive|2.3.7| > |spark|3.0.0| > |hbase|1.4.9| >Reporter: deyzhong >Assignee: Apache Spark >Priority: Major > Original Estimate: 72h > Remaining Estimate: 72h > > * step1: create hbase table > {code:java} > hbase(main):001:0>create 'hbase_test1', 'cf1' > hbase(main):001:0> put 'hbase_test', 'r1', 'cf1:c1', '123' > {code} > * step2: create hive table related to hbase table > > {code:java} > hive> > CREATE EXTERNAL TABLE `hivetest.hbase_test`( > `key` string COMMENT '', > `value` string COMMENT '') > ROW FORMAT SERDE > 'org.apache.hadoop.hive.hbase.HBaseSerDe' > STORED BY > 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' > WITH SERDEPROPERTIES ( > 'hbase.columns.mapping'=':key,cf1:v1', > 'serialization.format'='1') > TBLPROPERTIES ( > 'hbase.table.name'='hbase_test') > {code} > * step3: sparksql query hive table while data in hbase > {code:java} > spark-sql --master yarn -e "select * from hivetest.hbase_test" > {code} > > The error log as follow: > java.io.IOException: Cannot create a record reader because of a previous > error. Please look at the previous logs lines from the task's full log for > more details. > at > org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:270) > at org.apache.spark.rdd.NewHadoopRDD.getPartitions(NewHadoopRDD.scala:131) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2158) > at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1004) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:388) > at org.apache.spark.rdd.RDD.collect(RDD.scala:1003) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:385) > at > org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:412) > at > org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:58) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:377) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:496) > at scala.collection.Iterator.foreach(Iterator.scala:941) > at scala.collection.Iterator.foreach$(Iterator.scala:941) > at
[jira] [Assigned] (SPARK-32380) sparksql cannot access hive table while data in hbase
[ https://issues.apache.org/jira/browse/SPARK-32380?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Apache Spark reassigned SPARK-32380: Assignee: (was: Apache Spark) > sparksql cannot access hive table while data in hbase > - > > Key: SPARK-32380 > URL: https://issues.apache.org/jira/browse/SPARK-32380 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 3.0.0 > Environment: ||component||version|| > |hadoop|2.8.5| > |hive|2.3.7| > |spark|3.0.0| > |hbase|1.4.9| >Reporter: deyzhong >Priority: Major > Original Estimate: 72h > Remaining Estimate: 72h > > * step1: create hbase table > {code:java} > hbase(main):001:0>create 'hbase_test1', 'cf1' > hbase(main):001:0> put 'hbase_test', 'r1', 'cf1:c1', '123' > {code} > * step2: create hive table related to hbase table > > {code:java} > hive> > CREATE EXTERNAL TABLE `hivetest.hbase_test`( > `key` string COMMENT '', > `value` string COMMENT '') > ROW FORMAT SERDE > 'org.apache.hadoop.hive.hbase.HBaseSerDe' > STORED BY > 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' > WITH SERDEPROPERTIES ( > 'hbase.columns.mapping'=':key,cf1:v1', > 'serialization.format'='1') > TBLPROPERTIES ( > 'hbase.table.name'='hbase_test') > {code} > * step3: sparksql query hive table while data in hbase > {code:java} > spark-sql --master yarn -e "select * from hivetest.hbase_test" > {code} > > The error log as follow: > java.io.IOException: Cannot create a record reader because of a previous > error. Please look at the previous logs lines from the task's full log for > more details. > at > org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:270) > at org.apache.spark.rdd.NewHadoopRDD.getPartitions(NewHadoopRDD.scala:131) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2158) > at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1004) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:388) > at org.apache.spark.rdd.RDD.collect(RDD.scala:1003) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:385) > at > org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:412) > at > org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:58) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:377) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:496) > at scala.collection.Iterator.foreach(Iterator.scala:941) > at scala.collection.Iterator.foreach$(Iterator.scala:941) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) > at
[jira] [Assigned] (SPARK-32380) sparksql cannot access hive table while data in hbase
[ https://issues.apache.org/jira/browse/SPARK-32380?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Apache Spark reassigned SPARK-32380: Assignee: Apache Spark > sparksql cannot access hive table while data in hbase > - > > Key: SPARK-32380 > URL: https://issues.apache.org/jira/browse/SPARK-32380 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 3.0.0 > Environment: ||component||version|| > |hadoop|2.8.5| > |hive|2.3.7| > |spark|3.0.0| > |hbase|1.4.9| >Reporter: deyzhong >Assignee: Apache Spark >Priority: Major > Original Estimate: 72h > Remaining Estimate: 72h > > * step1: create hbase table > {code:java} > hbase(main):001:0>create 'hbase_test1', 'cf1' > hbase(main):001:0> put 'hbase_test', 'r1', 'cf1:c1', '123' > {code} > * step2: create hive table related to hbase table > > {code:java} > hive> > CREATE EXTERNAL TABLE `hivetest.hbase_test`( > `key` string COMMENT '', > `value` string COMMENT '') > ROW FORMAT SERDE > 'org.apache.hadoop.hive.hbase.HBaseSerDe' > STORED BY > 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' > WITH SERDEPROPERTIES ( > 'hbase.columns.mapping'=':key,cf1:v1', > 'serialization.format'='1') > TBLPROPERTIES ( > 'hbase.table.name'='hbase_test') > {code} > * step3: sparksql query hive table while data in hbase > {code:java} > spark-sql --master yarn -e "select * from hivetest.hbase_test" > {code} > > The error log as follow: > java.io.IOException: Cannot create a record reader because of a previous > error. Please look at the previous logs lines from the task's full log for > more details. > at > org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:270) > at org.apache.spark.rdd.NewHadoopRDD.getPartitions(NewHadoopRDD.scala:131) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2158) > at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1004) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:388) > at org.apache.spark.rdd.RDD.collect(RDD.scala:1003) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:385) > at > org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:412) > at > org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:58) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:377) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:496) > at scala.collection.Iterator.foreach(Iterator.scala:941) > at scala.collection.Iterator.foreach$(Iterator.scala:941) > at
[jira] [Assigned] (SPARK-32380) sparksql cannot access hive table while data in hbase
[ https://issues.apache.org/jira/browse/SPARK-32380?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Apache Spark reassigned SPARK-32380: Assignee: (was: Apache Spark) > sparksql cannot access hive table while data in hbase > - > > Key: SPARK-32380 > URL: https://issues.apache.org/jira/browse/SPARK-32380 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 3.0.0 > Environment: ||component||version|| > |hadoop|2.8.5| > |hive|2.3.7| > |spark|3.0.0| > |hbase|1.4.9| >Reporter: deyzhong >Priority: Major > Original Estimate: 72h > Remaining Estimate: 72h > > * step1: create hbase table > {code:java} > hbase(main):001:0>create 'hbase_test1', 'cf1' > hbase(main):001:0> put 'hbase_test', 'r1', 'cf1:c1', '123' > {code} > * step2: create hive table related to hbase table > > {code:java} > hive> > CREATE EXTERNAL TABLE `hivetest.hbase_test`( > `key` string COMMENT '', > `value` string COMMENT '') > ROW FORMAT SERDE > 'org.apache.hadoop.hive.hbase.HBaseSerDe' > STORED BY > 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' > WITH SERDEPROPERTIES ( > 'hbase.columns.mapping'=':key,cf1:v1', > 'serialization.format'='1') > TBLPROPERTIES ( > 'hbase.table.name'='hbase_test') > {code} > * step3: sparksql query hive table while data in hbase > {code:java} > spark-sql --master yarn -e "select * from hivetest.hbase_test" > {code} > > The error log as follow: > java.io.IOException: Cannot create a record reader because of a previous > error. Please look at the previous logs lines from the task's full log for > more details. > at > org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:270) > at org.apache.spark.rdd.NewHadoopRDD.getPartitions(NewHadoopRDD.scala:131) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) > at scala.Option.getOrElse(Option.scala:189) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2158) > at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1004) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:388) > at org.apache.spark.rdd.RDD.collect(RDD.scala:1003) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:385) > at > org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:412) > at > org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:58) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:377) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:496) > at scala.collection.Iterator.foreach(Iterator.scala:941) > at scala.collection.Iterator.foreach$(Iterator.scala:941) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) > at