[ https://issues.apache.org/jira/browse/SPARK-12378?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15387889#comment-15387889 ]
Chandana Sapparapu edited comment on SPARK-12378 at 7/24/16 3:46 PM: --------------------------------------------------------------------- UPDATE - For temporary table, I don't have this issue. For external table - the following syntax seems to work. FROM (select a.* ,b.* from TBL_A a , TBL_B b where a.id=b.id) INPUT insert overwrite TABLE MY_EXTERNAL_TABLE PARTITION (PARTITION_DT='2016-07-24' , SUB_PARTITION_DT='2016-07-24') select * where flg_1='N'; -------------------------------------------------------------------------- Environment: Spark 1.6.2 AWS EMR 4.7.2 Hive execution 1.2.1 Metastore: 1.0.0 Query : insert into table (managed) select a.*, b.* from external_table1 a LEFT OUTER JOIN external_table2 b on a.id=b.id; The statement succeeds and some data is written into this managed table, but it throws the following exception which stops the subsequent statements from getting executed. java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.sql.hive.client.Shim_v0_14.loadTable(HiveShim.scala:442) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply$mcV$sp(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:290) at org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:237) at org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:236) at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:279) at org.apache.spark.sql.hive.client.ClientWrapper.loadTable(ClientWrapper.scala:556) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult$lzycompute(InsertIntoHiveTable.scala:256) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult(InsertIntoHiveTable.scala:127) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.doExecute(InsertIntoHiveTable.scala:276) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:63) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:311) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to alter table. Invalid method name: 'alter_table_with_cascade' at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:500) at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:484) at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1668) ... 40 more Caused by: org.apache.thrift.TApplicationException: Invalid method name: 'alter_table_with_cascade' at org.apache.thrift.TApplicationException.read(TApplicationException.java:111) at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:71) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_alter_table_with_cascade(ThriftHiveMetastore.java:1374) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.alter_table_with_cascade(ThriftHiveMetastore.java:1358) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.alter_table(HiveMetaStoreClient.java:340) at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_table(SessionHiveMetaStoreClient.java:251) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156) at com.sun.proxy.$Proxy28.alter_table(Unknown Source) at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:496) ... 42 more java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.sql.hive.client.Shim_v0_14.loadTable(HiveShim.scala:442) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply$mcV$sp(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:290) at org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:237) at org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:236) at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:279) at org.apache.spark.sql.hive.client.ClientWrapper.loadTable(ClientWrapper.scala:556) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult$lzycompute(InsertIntoHiveTable.scala:256) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult(InsertIntoHiveTable.scala:127) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.doExecute(InsertIntoHiveTable.scala:276) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:63) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:311) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to alter table. Invalid method name: 'alter_table_with_cascade' at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:500) at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:484) at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1668) ... 40 more Caused by: org.apache.thrift.TApplicationException: Invalid method name: 'alter_table_with_cascade' at org.apache.thrift.TApplicationException.read(TApplicationException.java:111) at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:71) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_alter_table_with_cascade(ThriftHiveMetastore.java:1374) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.alter_table_with_cascade(ThriftHiveMetastore.java:1358) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.alter_table(HiveMetaStoreClient.java:340) at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_table(SessionHiveMetaStoreClient.java:251) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156) at com.sun.proxy.$Proxy28.alter_table(Unknown Source) at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:496) ... 42 more Spark 1.6.2 AWS EMR 4.7.2 Hive execution 1.2.1 Metastore: 1.0.0 I had a problem with hive 1.2.1 and default spark-sql on EMR 4.7.2 Since this could be a metastore, execution engine mismatch, I tried the following version changes - but it didn't work. Changes in spark - /usr/lib/spark/conf/spark-defaults.conf spark.sql.hive.metastore.version 1.0.0 spark.sql.hive.metastore.jars maven Got this exception - Builtin jars can only be used when hive execution version == hive metastore version. Execution: 1.2.1 != Metastore: 1.0.0. Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS or change spark.sql.hive.metastore.version to 1.2.1. was (Author: fullstack): Environment: Spark 1.6.2 AWS EMR 4.7.2 Hive execution 1.2.1 Metastore: 1.0.0 Query : insert into table (managed) select a.*, b.* from external_table1 a LEFT OUTER JOIN external_table2 b on a.id=b.id; The statement succeeds and some data is written into this managed table, but it throws the following exception which stops the subsequent statements from getting executed. java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.sql.hive.client.Shim_v0_14.loadTable(HiveShim.scala:442) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply$mcV$sp(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:290) at org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:237) at org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:236) at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:279) at org.apache.spark.sql.hive.client.ClientWrapper.loadTable(ClientWrapper.scala:556) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult$lzycompute(InsertIntoHiveTable.scala:256) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult(InsertIntoHiveTable.scala:127) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.doExecute(InsertIntoHiveTable.scala:276) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:63) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:311) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to alter table. Invalid method name: 'alter_table_with_cascade' at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:500) at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:484) at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1668) ... 40 more Caused by: org.apache.thrift.TApplicationException: Invalid method name: 'alter_table_with_cascade' at org.apache.thrift.TApplicationException.read(TApplicationException.java:111) at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:71) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_alter_table_with_cascade(ThriftHiveMetastore.java:1374) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.alter_table_with_cascade(ThriftHiveMetastore.java:1358) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.alter_table(HiveMetaStoreClient.java:340) at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_table(SessionHiveMetaStoreClient.java:251) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156) at com.sun.proxy.$Proxy28.alter_table(Unknown Source) at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:496) ... 42 more java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.sql.hive.client.Shim_v0_14.loadTable(HiveShim.scala:442) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply$mcV$sp(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:557) at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:290) at org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:237) at org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:236) at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:279) at org.apache.spark.sql.hive.client.ClientWrapper.loadTable(ClientWrapper.scala:556) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult$lzycompute(InsertIntoHiveTable.scala:256) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult(InsertIntoHiveTable.scala:127) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.doExecute(InsertIntoHiveTable.scala:276) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:63) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:311) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to alter table. Invalid method name: 'alter_table_with_cascade' at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:500) at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:484) at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1668) ... 40 more Caused by: org.apache.thrift.TApplicationException: Invalid method name: 'alter_table_with_cascade' at org.apache.thrift.TApplicationException.read(TApplicationException.java:111) at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:71) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_alter_table_with_cascade(ThriftHiveMetastore.java:1374) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.alter_table_with_cascade(ThriftHiveMetastore.java:1358) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.alter_table(HiveMetaStoreClient.java:340) at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_table(SessionHiveMetaStoreClient.java:251) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156) at com.sun.proxy.$Proxy28.alter_table(Unknown Source) at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:496) ... 42 more Spark 1.6.2 AWS EMR 4.7.2 Hive execution 1.2.1 Metastore: 1.0.0 I had a problem with hive 1.2.1 and default spark-sql on EMR 4.7.2 Since this could be a metastore, execution engine mismatch, I tried the following version changes - but it didn't work. Changes in spark - /usr/lib/spark/conf/spark-defaults.conf spark.sql.hive.metastore.version 1.0.0 spark.sql.hive.metastore.jars maven Got this exception - Builtin jars can only be used when hive execution version == hive metastore version. Execution: 1.2.1 != Metastore: 1.0.0. Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS or change spark.sql.hive.metastore.version to 1.2.1. > CREATE EXTERNAL TABLE AS SELECT EXPORT AWS S3 ERROR > --------------------------------------------------- > > Key: SPARK-12378 > URL: https://issues.apache.org/jira/browse/SPARK-12378 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.5.2 > Environment: AWS EMR 4.2.0 > Just Master Running m3.xlarge > Applications: > Hive 1.0.0 > Spark 1.5.2 > Reporter: CESAR MICHELETTI > > I am receive the bellow error during try exporting data to AWS S3, in > spark-sql. > Command: > CREATE external TABLE export > ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' > -- lines terminated by '\n' > STORED AS TEXTFILE > LOCATION 's3://xxx/yyy' > AS > SELECT > xxx > .... > (complete query) > ; > Error: > -chgrp: '' does not match expected pattern for group > Usage: hadoop fs [generic options] -chgrp [-R] GROUP PATH... > -chgrp: '' does not match expected pattern for group > Usage: hadoop fs [generic options] -chgrp [-R] GROUP PATH... > 15/12/16 21:09:25 ERROR SparkSQLDriver: Failed in [CREATE external TABLE > csvexport > ... > (create table + query) > ... > java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.sql.hive.client.Shim_v0_14.loadTable(HiveShim.scala:441) > at > org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply$mcV$sp(ClientWrapper.scala:489) > at > org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:489) > at > org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:489) > at > org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:256) > at > org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:211) > at > org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:248) > at > org.apache.spark.sql.hive.client.ClientWrapper.loadTable(ClientWrapper.scala:488) > at > org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult$lzycompute(InsertIntoHiveTable.scala:243) > at > org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult(InsertIntoHiveTable.scala:127) > at > org.apache.spark.sql.hive.execution.InsertIntoHiveTable.doExecute(InsertIntoHiveTable.scala:263) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:140) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:138) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:138) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:933) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:933) > at > org.apache.spark.sql.hive.execution.CreateTableAsSelect.run(CreateTableAsSelect.scala:89) > at > org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57) > at > org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57) > at > org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:69) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:140) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:138) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:138) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:933) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:933) > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:144) > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:129) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:725) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:308) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674) > at > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to alter > table. Invalid method name: 'alter_table_with_cascade' > at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:500) > at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:484) > at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1668) > ... 49 more > Caused by: org.apache.thrift.TApplicationException: Invalid method name: > 'alter_table_with_cascade' > at > org.apache.thrift.TApplicationException.read(TApplicationException.java:111) > at > org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:71) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_alter_table_with_cascade(ThriftHiveMetastore.java:1374) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.alter_table_with_cascade(ThriftHiveMetastore.java:1358) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient.alter_table(HiveMetaStoreClient.java:340) > at > org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_table(SessionHiveMetaStoreClient.java:251) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156) > at com.sun.proxy.$Proxy29.alter_table(Unknown Source) > at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:496) > ... 51 more > java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.sql.hive.client.Shim_v0_14.loadTable(HiveShim.scala:441) > at > org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply$mcV$sp(ClientWrapper.scala:489) > at > org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:489) > at > org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$loadTable$1.apply(ClientWrapper.scala:489) > at > org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:256) > at > org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:211) > at > org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:248) > at > org.apache.spark.sql.hive.client.ClientWrapper.loadTable(ClientWrapper.scala:488) > at > org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult$lzycompute(InsertIntoHiveTable.scala:243) > at > org.apache.spark.sql.hive.execution.InsertIntoHiveTable.sideEffectResult(InsertIntoHiveTable.scala:127) > at > org.apache.spark.sql.hive.execution.InsertIntoHiveTable.doExecute(InsertIntoHiveTable.scala:263) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:140) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:138) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:138) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:933) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:933) > at > org.apache.spark.sql.hive.execution.CreateTableAsSelect.run(CreateTableAsSelect.scala:89) > at > org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57) > at > org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57) > at > org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:69) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:140) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:138) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:138) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:933) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:933) > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:144) > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:129) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:725) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:308) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674) > at > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to alter > table. Invalid method name: 'alter_table_with_cascade' > at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:500) > at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:484) > at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:1668) > ... 49 more > Caused by: org.apache.thrift.TApplicationException: Invalid method name: > 'alter_table_with_cascade' > at > org.apache.thrift.TApplicationException.read(TApplicationException.java:111) > at > org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:71) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_alter_table_with_cascade(ThriftHiveMetastore.java:1374) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.alter_table_with_cascade(ThriftHiveMetastore.java:1358) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient.alter_table(HiveMetaStoreClient.java:340) > at > org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_table(SessionHiveMetaStoreClient.java:251) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156) > at com.sun.proxy.$Proxy29.alter_table(Unknown Source) > at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:496) > ... 51 more > Detail: > The file exported is generated in S3 bucket, but the process terminate with > error... -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org