Here is the hive-site.xml <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration> <!-- Hive Execution Parameters --> <property> <name>hive.metastore.local</name> <value>false</value> <description>controls whether to connect to remove metastore server or open a new metastore server in Hive Client JVM</description> </property> <property> <name>hive.metastore.uris</name> <value>thrift://*****************:2513</value> <description>Remote location of the metastore server</description> </property> <property> <name>hive.metastore.warehouse.dir</name> <value>/projects/hcatalog-warehouse</value> <description>location of default database for the warehouse</description> </property> <property> <name>hive.metastore.sasl.enabled</name> <value>true</value> <description>If true, the metastore thrift interface will be secured with SASL. Clients must authenticate with Kerberos.</description> </property> <property> <name>hive.metastore.kerberos.principal</name> <value>hcat/*****.com@****.COM</value> <description>The service principal for the metastore thrift server. The special string _HOST will be replaced automatically with the correct host name.</description> </property> <property> <name>hive.metastore.client.socket.timeout</name> <value>200</value> <description>MetaStore Client socket timeout in seconds</description> </property> <property> <name>hive.exec.mode.local.auto</name> <value>false</value> <description>Let hive determine whether to run in local mode automatically</description> </property> <property> <name>hive.hadoop.supports.splittable.combineinputformat</name> <value>true</value> <description>Hive internal, should be set to true as MAPREDUCE-1597 is present in Hadoop</description> </property> <property> <name>hive.exec.scratchdir</name> <value>/tmp</value> <description>HDFS Scratch space for Hive jobs</description> </property> <property> <name>hive.querylog.location</name> <value>${user.home}/hivelogs</value> <description>Local Directory where structured hive query logs are created. One file per session is created in this directory. If this variable set to empty string structured log will not be created.</description> </property> <property> <name>mapreduce.job.queuename</name> <value>default</value> <description>Set a default queue name for execution of the Hive queries</description> </property> <property> <name>hadoop.clientside.fs.operations</name> <value>true</value> <description>FS operations related to DDL operations are owned by Hive client</description> </property> <property> <name>hive.exec.compress.output</name> <value>true</value> <description> This controls whether the final outputs of a query (to a local/hdfs file or a hive table) is compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress* </description> </property> <property> <name>hive.exec.compress.intermediate</name> <value>true</value> <description> This controls whether intermediate files produced by hive between multiple map-reduce jobs are compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress* </description> </property> <property> <name>hive.auto.convert.join</name> <value>false</value> <description> This controls whether intermediate files produced by hive between multiple map-reduce jobs are compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress* </description> </property> <property> <name>hive.optimize.partition.prune.metadata</name> <value>true</value> <description>This controls whether metadata optimizations are applied during partition pruning</description> </property> <property> <name>hive.mapred.mode</name> <value>nonstrict</value> <description>The mode in which the hive operations are being performed. In strict mode, some risky queries are not allowed to run</description> </property> <property> <name>io.seqfile.compression.type</name> <value>BLOCK</value> <description>Determines how the compression is performed. Can take NONE, RECORD or BLOCK</description> </property> <property> <name>hive.input.format</name> <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value> <description>Determines the input format. Can take org.apache.hadoop.hive.ql.io.HiveInputFormat or org.apache.hadoop.hive.ql.io.CombineHiveInputFormat (default)</description> </property> <property> <name>mapreduce.input.fileinputformat.split.minsize</name> <value>134217728</value> <description>Size of the minimum split for CombineFileInputFormat (128MB recommended)</description> </property> <property> <name>mapreduce.input.fileinputformat.split.maxsize</name> <value>1073741824</value> <description>Size of maximum split for CombineFileInputFormat (1GB recommended)</description> </property> <property> <name>mapreduce.input.fileinputformat.split.minsize.per.rack</name> <value>134217728</value> <description>Size of minimum split size per rack (128MB recommended)</description> </property> <property> <name>mapreduce.input.fileinputformat.split.minsize.per.node</name> <value>134217728</value> <description>Size of minimum split size per node (128MB recommended)</description> </property> <property> <name>hive.exec.orc.default.stripe.size</name> <value>33554432</value> <description>Size in bytes of a stripe in the ORC file (half the block size recommended)</description> </property> <property> <name>hive.exec.orc.default.compress</name> <value>ZLIB</value> <description>The compression method used in an ORC stripe (NONE, SNAPPY, LZO, ZLIB (default))</description> </property> <property> <name>hive.exec.orc.default.row.index.stride</name> <value>10000</value> <description>Size in number of rows of the index stride (10000 recommended)</description> </property> <property> <name>hive.exec.orc.default.buffer.size</name> <value>262144</value> <description>Size in bytes of the ORC buffer (256K recommended)</description> </property> <property> <name>hive.exec.orc.default.block.padding</name> <value>true</value> <description>Enable block padding (true recommended)</description> </property> <property> <name>hive.exec.show.job.failure.debug.info</name> <value>false</value> <description>If a job fails, whether to provide a link in the CLI to the task with the most failures, along with debugging hints if applicable.</description> </property> <property> <name>hive.ppd.remove.duplicatefilters</name> <value>true</value> <description>Disable HIVE-1538 as it caused regression HIVE-2791 and HIVE-2344. HIVE-2791 is not available in Hive 0.8 and hence should be disabled</description> </property> <property> <name>hive.security.authorization.enabled</name> <value>false</value> <description>Perform authorization checks on the client</description> </property> <!-- <property> <name>hive.security.authorization.manager</name> <value>org.apache.hcatalog.security.HdfsAuthorizationProvider</value> <description>Authorization provider that performs authorization checks on the client</description> </property> --> <property> <name>hive.metastore.schema.verification</name> <value>false</value> </property> <property> <name>hcatalog.hive.client.cache.expiry.time</name> <value>120</value> <description>Time in seconds after a hive client is created that it should be removed from the client cache</description> </property> <property> <name>hive.limit.query.max.table.partition</name> <value>125000</value> <description>The maximum number of partitions a table can scan</description> </property> <property> <name>mapred.dfsclient.parallelism.max</name> <value>5</value> <description>Max threads to perform getInputSummary during job submission</description> </property> <property> <name>hive.stats.autogather</name> <value>false</value> <description>A flag to gather statistics automatically during the INSERT OVERWRITE command.</description> </property> </configuration> On Tue, Oct 7, 2014 at 6:16 PM, Cheng Lian <lian.cs....@gmail.com> wrote: > So it seems that the classpath issue has been resolved. The instantiation > failure should be related to your hive-site.xml. Would you mind to create a > public gist for your hive-site.xml? > > > On 10/8/14 4:34 AM, Li HM wrote: >> >> Thanks Cheng. >> >> Here is the error message after a fresh build. >> >> $ mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.5.0 -Phive -DskipTests >> clean package >> [INFO] >> ------------------------------------------------------------------------ >> [INFO] Reactor Summary: >> [INFO] >> [INFO] Spark Project Parent POM .......................... SUCCESS >> [19.117s] >> [INFO] Spark Project Core ................................ SUCCESS >> [11:24.009s] >> [INFO] Spark Project Bagel ............................... SUCCESS >> [1:09.498s] >> [INFO] Spark Project GraphX .............................. SUCCESS >> [3:41.113s] >> [INFO] Spark Project Streaming ........................... SUCCESS >> [4:25.378s] >> [INFO] Spark Project ML Library .......................... SUCCESS >> [5:43.323s] >> [INFO] Spark Project Tools ............................... SUCCESS >> [44.647s] >> [INFO] Spark Project Catalyst ............................ SUCCESS >> [4:48.658s] >> [INFO] Spark Project SQL ................................. SUCCESS >> [4:56.966s] >> [INFO] Spark Project Hive ................................ SUCCESS >> [3:45.269s] >> [INFO] Spark Project REPL ................................ SUCCESS >> [2:11.617s] >> [INFO] Spark Project YARN Parent POM ..................... SUCCESS >> [6.723s] >> [INFO] Spark Project YARN Stable API ..................... SUCCESS >> [2:20.860s] >> [INFO] Spark Project Hive Thrift Server .................. SUCCESS >> [1:15.231s] >> [INFO] Spark Project Assembly ............................ SUCCESS >> [1:41.245s] >> [INFO] Spark Project External Twitter .................... SUCCESS >> [50.839s] >> [INFO] Spark Project External Kafka ...................... SUCCESS >> [1:15.888s] >> [INFO] Spark Project External Flume Sink ................. SUCCESS >> [57.807s] >> [INFO] Spark Project External Flume ...................... SUCCESS >> [1:26.589s] >> [INFO] Spark Project External ZeroMQ ..................... SUCCESS >> [54.361s] >> [INFO] Spark Project External MQTT ....................... SUCCESS >> [53.901s] >> [INFO] Spark Project Examples ............................ SUCCESS >> [2:39.407s] >> [INFO] >> ------------------------------------------------------------------------ >> [INFO] BUILD SUCCESS >> [INFO] >> ------------------------------------------------------------------------ >> >> spark-sql> use mydb; >> FAILED: Execution Error, return code 1 from >> org.apache.hadoop.hive.ql.exec.DDLTask. java.lang.RuntimeException: >> Unable to instantiate >> org.apache.hadoop.hive.metastore.HiveMetaStoreClient >> org.apache.spark.sql.execution.QueryExecutionException: FAILED: >> Execution Error, return code 1 from >> org.apache.hadoop.hive.ql.exec.DDLTask. java.lang.RuntimeException: >> Unable to instantiate >> org.apache.hadoop.hive.metastore.HiveMetaStoreClient >> at org.apache.spark.sql.hive.HiveContext.runHive(HiveContext.scala:302) >> at org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:272) >> at >> org.apache.spark.sql.hive.execution.NativeCommand.sideEffectResult$lzycompute(NativeCommand.scala:35) >> at >> org.apache.spark.sql.hive.execution.NativeCommand.sideEffectResult(NativeCommand.scala:35) >> at >> org.apache.spark.sql.hive.execution.NativeCommand.execute(NativeCommand.scala:38) >> at >> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd$lzycompute(HiveContext.scala:360) >> at >> org.apache.spark.sql.hive.HiveContext$QueryExecution.toRdd(HiveContext.scala:360) >> at org.apache.spark.sql.SchemaRDDLike$class.$init$(SchemaRDDLike.scala:58) >> at org.apache.spark.sql.SchemaRDD.<init>(SchemaRDD.scala:103) >> at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:98) >> at >> org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:58) >> at >> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:291) >> at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413) >> at >> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) >> at >> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> at >> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >> at java.lang.reflect.Method.invoke(Method.java:601) >> at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) >> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) >> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) >> >> On Tue, Oct 7, 2014 at 6:19 AM, Cheng Lian <lian.cs....@gmail.com> wrote: >>> >>> The build command should be correct. What exact error did you encounter >>> when >>> trying Spark 1.1 + Hive 0.12 + Hadoop 2.5.0? >>> >>> > --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org