I'm sharing all the Config files and code here, Please have a look Following is the code i'm using
System.setProperty("java.security.krb5.conf", "/etc/krb5.conf"); System.setProperty("java.security.auth.login.config", "/etc/hbase/conf/zk-jaas.conf"); val hconf = HBaseConfiguration.create() val tableName = "emp" hconf.set("hbase.zookeeper.quorum", "hadoop-master") hconf.set(TableInputFormat.INPUT_TABLE, tableName) hconf.set("hbase.zookeeper.property.clientPort", "2181") hconf.set("hbase.master", "hadoop-master:60000") hconf.set("hadoop.security.authentication", "kerberos") hconf.set("hbase.security.authentication", "kerberos") hconf.addResource(new Path("/etc/hbase/conf/core-site.xml")) hconf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")) UserGroupInformation.setConfiguration(hconf) UserGroupInformation.loginUserFromKeytab("spark@platalyticsrealm", "/etc/hadoop/conf/sp.keytab") val conf = new SparkConf() conf.set("spark.yarn.security.tokens.habse.enabled", "true") conf.set("hadoop.security.authentication", "true") conf.set("hbase.security.authentication", "true") val sc = new SparkContext(conf) println("***********************************" * 6) println(sc.getConf.get("spark.yarn.security.tokens.habse.enabled")) println(UserGroupInformation.isSecurityEnabled) println("***********************************" * 6) val hBaseRDD = sc.newAPIHadoopRDD(hconf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) //get the row count val count = hBaseRDD.count() print("HBase RDD count:" + count) Here are the contents of *spark-defaults.conf* file spark.master spark://hadoop-master:7077 spark.eventLog.enabled true spark.eventLog.dir hdfs://hadoop-master:8020/spark/logs spark.serializer org.apache.spark.serializer.KryoSerializer spark.yarn.access.namenodes hdfs://hadoop-master:8020/ spark.yarn.security.tokens.hbase.enabled true spark.yarn.security.tokens.hive.enabled true spark.yarn.principal yarn/hadoop-master@platalyticsrealm spark.yarn.keytab /etc/hadoop/conf/yarn.keytab spark.driver.extraClassPath /root/hbase-1.2.2/lib/hbase-protocol-1.2.2.jar:/root/hbase-1.2.2/lib/hbase-server-1.2.2.jar:/root/hbase-1.2.2/lib/hbase-client-1.2.2.jar:/root/hbase-1.2.2/lib/hbase-common-1.2.2.jar:/root/hbase-1.2.2/lib/htrace-core-3.1.0-incubating.jar:protobuf-java-2.5.0.jar:/root/hbase-1.2.2/lib/hbase-annotations-1.2.2.jar:/root/hbase-1.2.2/lib/zookeeper-3.4.6.jar:/root/hbase-1.2.2/lib/metrics-core-2.2.0.jar spark.executor.extraClassPath /root/hbase-1.2.2/lib/hbase-protocol-1.2.2.jar:/root/hbase-1.2.2/lib/hbase-server-1.2.2.jar:/root/hbase-1.2.2/lib/hbase-client-1.2.2.jar:/root/hbase-1.2.2/lib/hbase-common-1.2.2.jar:/root/hbase-1.2.2/lib/htrace-core-3.1.0-incubating.jar:protobuf-java-2.5.0.jar:/root/hbase-1.2.2/lib/hbase-annotations-1.2.2.jar:/root/hbase-1.2.2/lib/zookeeper-3.4.6.jar:/root/hbase-1.2.2/lib/metrics-core-2.2.0.jar Here are the contents of *spark-env.sh* file SPARK_MASTER_WEBUI_PORT=8181 SPARK_WORKER_WEBUI_PORT=8082 export SPARK_HOME=/usr/local/spark-2 export DEFAULT_HADOOP_HOME=/usr/local/hadoop export SPARK_JAR_HDFS_PATH=${SPARK_JAR_HDFS_PATH:-/usr/local/spark-2/assembly/target/scala-2.11/jars/spark-assembly_2.11-2.0.1-SNAPSHOT.jar} #echo $SPARK_JAR_HDFS_PATH export SPARK_LAUNCH_WITH_SCALA=0 export SPARK_LIBRARY_PATH=${SPARK_JAR_HDFS_PATH} export SCALA_LIBRARY_PATH=${SPARK_JAR_HDFS_PATH} export HADOOP_HOME=${HADOOP_HOME:-$DEFAULT_HADOOP_HOME} #echo $HADOOP_HOME if [ -n "$HADOOP_HOME" ]; then export LD_LIBRARY_PATH=${HADOOP_HOME}/lib/native fi #echo $LD_LIBRARY_PATH export SPARK_MASTER_OPTS="-Dspark.deploy.defaultCores=2 -Dspark.worker.cleanup.enabled=true -Dspark.worker.cleanup.appDataTtl=86400" export YARN_CONF_DIR=/usr/local/hadoop/conf export HBASE_CONF_DIR=/root/hbase-1.2.2/conf export HADOOP_CONF_DIR=/usr/local/hadoop/conf #echo $HADOOP_CONF_DIR HOSTNAME=`hostname -f` #echo $HOSTNAME export SPARK_HISTORY_OPTS="-Dspark.history.kerberos.enabled=true -Dspark.history.kerberos.principal=spark/${HOSTNAME}@platalyticsrealm -Dspark.history.kerberos.keytab=/etc/hadoop/conf/spark.keytab" On Thu, Aug 11, 2016 at 1:13 AM, Aneela Saleem <ane...@platalytics.com> wrote: > Hi Subroto, > > I checked this. When i set the property in spark-defaults.conf file and > log its value from SparkConf, it says "No Such Element Found". But when i > set it through SparkConf explicitly, the previous issue is not resolved. > > I'm trying hard to get it done but no workaround found yet! > > Thanks > > On Wed, Aug 10, 2016 at 4:54 PM, Subroto Sanyal <ssan...@datameer.com> > wrote: > >> Not sure what could be the problem be but, I would suggest you to double >> check if the said property is part of SparkConf obejct being created in >> the >> code (just by logging it). >> >> Cheers, >> Subroto Sanyal >> >> On Wed, Aug 10, 2016 at 1:39 PM, Aneela Saleem <ane...@platalytics.com> >> wrote: >> >> > The property was already set in spark-default.conf file but still facing >> > same error. >> > >> > On Wed, Aug 10, 2016 at 4:35 PM, Subroto Sanyal <ssan...@datameer.com> >> > wrote: >> > >> > > yes... you can set the property in the conf file or you can the >> property >> > > explicitly in the Spark Configuration object used while creation of >> > > SparkContext/JavaSparkContext. >> > > >> > > Cheers, >> > > Subroto Sanyal >> > > >> > > On Wed, Aug 10, 2016 at 12:09 PM, Aneela Saleem < >> ane...@platalytics.com> >> > > wrote: >> > > >> > > > Thanks Subroto, >> > > > >> > > > Do i need to set it to 'true' in spar-default.conf file? >> > > > >> > > > On Wed, Aug 10, 2016 at 2:59 PM, Subroto Sanyal < >> ssan...@datameer.com> >> > > > wrote: >> > > > >> > > > > hi Aneela >> > > > > >> > > > > By any chance you are missing the property: >> > > > > spark.yarn.security.tokens.habse.enabled >> > > > > This was introduced as part of the fix: >> > > > > https://github.com/apache/spark/pull/8134/files >> > > > > >> > > > > Cheers, >> > > > > Subroto Sanyal >> > > > > >> > > > > On Wed, Aug 10, 2016 at 11:53 AM, Aneela Saleem < >> > > ane...@platalytics.com> >> > > > > wrote: >> > > > > >> > > > > > And I'm using Apache distribution of Spark not Cloudera. >> > > > > > >> > > > > > On Wed, Aug 10, 2016 at 12:06 PM, Aneela Saleem < >> > > > ane...@platalytics.com> >> > > > > > wrote: >> > > > > > >> > > > > > > Thanks Nkechi, >> > > > > > > >> > > > > > > I added this dependency as an external jar, when i compile the >> > > code, >> > > > > > > unfortunately i got the following error: >> > > > > > > >> > > > > > > error: object cloudera is not a member of package com >> > > > > > > [ERROR] import com.cloudera.spark.hbase.HBaseContext >> > > > > > > >> > > > > > > >> > > > > > > >> > > > > > > On Tue, Aug 9, 2016 at 7:51 PM, Nkechi Achara < >> > > > nkach...@googlemail.com >> > > > > > >> > > > > > > wrote: >> > > > > > > >> > > > > > >> hi, >> > > > > > >> >> > > > > > >> Due to the fact we are not on Hbase 2.00 we are using >> > > SparkOnHbase. >> > > > > > >> >> > > > > > >> Dependency: >> > > > > > >> <dependency> >> > > > > > >> <groupId>com.cloudera</groupId> >> > > > > > >> <artifactId>spark-hbase</artifactId> >> > > > > > >> <version>0.0.2-clabs</version> >> > > > > > >> </dependency> >> > > > > > >> >> > > > > > >> It is quite a small snippet of code. For a general scan >> using a >> > > > start >> > > > > > and >> > > > > > >> stop time as the scan time range. >> > > > > > >> >> > > > > > >> val conf = new SparkConf(). >> > > > > > >> set("spark.shuffle.consolidateFiles", "true"). >> > > > > > >> set("spark.kryo.registrationRequired", "false"). >> > > > > > >> set("spark.serializer", "org.apache.spark.serializer.K >> > > > > > >> ryoSerializer"). >> > > > > > >> set("spark.kryoserializer.buffer", "30m"). >> > > > > > >> set("spark.shuffle.spill", "true"). >> > > > > > >> set("spark.shuffle.memoryFraction", "0.4") >> > > > > > >> >> > > > > > >> val sc = new SparkContext(conf) >> > > > > > >> >> > > > > > >> val scan = new Scan() >> > > > > > >> scan.addColumn(columnName, "column1") >> > > > > > >> scan.setTimeRange(scanRowStartTs, scanRowStopTs) >> > > > > > >> hc.hbaseRDD(inputTableName,scan,filter) >> > > > > > >> >> > > > > > >> To run just use the following: >> > > > > > >> >> > > > > > >> spark-submit --class ClassName --master yarn-client >> > > --driver-memory >> > > > > > >> 2000M --executor-memory 5G --keytab <location of keytab> >> > > --principal >> > > > > > >> <location of principal> >> > > > > > >> >> > > > > > >> That should work in a general way. Obviously you can utilise >> > other >> > > > > scan >> > > > > > / >> > > > > > >> put / gets etc methods. >> > > > > > >> >> > > > > > >> Thanks, >> > > > > > >> >> > > > > > >> Nkechi >> > > > > > >> >> > > > > > >> On 9 August 2016 at 15:20, Aneela Saleem < >> > ane...@platalytics.com> >> > > > > > wrote: >> > > > > > >> >> > > > > > >> > Thanks Nkechi, >> > > > > > >> > >> > > > > > >> > Can you please direct me to some code snippet with hbase on >> > > spark >> > > > > > >> module? >> > > > > > >> > I've been trying that for last few days but did not found a >> > > > > > workaround. >> > > > > > >> > >> > > > > > >> > >> > > > > > >> > >> > > > > > >> > On Tue, Aug 9, 2016 at 6:13 PM, Nkechi Achara < >> > > > > > nkach...@googlemail.com> >> > > > > > >> > wrote: >> > > > > > >> > >> > > > > > >> > > Hey, >> > > > > > >> > > >> > > > > > >> > > Have you tried hbase on spark module, or the spark-hbase >> > > module >> > > > to >> > > > > > >> > connect? >> > > > > > >> > > The principal and keytab options should work out of the >> box >> > > for >> > > > > > >> > kerberized >> > > > > > >> > > access. I can attempt your code if you don't have the >> > ability >> > > to >> > > > > use >> > > > > > >> > those >> > > > > > >> > > modules. >> > > > > > >> > > >> > > > > > >> > > Thanks >> > > > > > >> > > K >> > > > > > >> > > >> > > > > > >> > > On 9 Aug 2016 2:25 p.m., "Aneela Saleem" < >> > > > ane...@platalytics.com> >> > > > > > >> wrote: >> > > > > > >> > > >> > > > > > >> > > > Hi all, >> > > > > > >> > > > >> > > > > > >> > > > I'm trying to connect to Hbase with security enabled >> using >> > > > spark >> > > > > > >> job. I >> > > > > > >> > > > have kinit'd from command line. When i run the >> following >> > job >> > > > > i.e., >> > > > > > >> > > > >> > > > > > >> > > > /usr/local/spark-2/bin/spark-submit --keytab >> > > > > > >> > > /etc/hadoop/conf/spark.keytab >> > > > > > >> > > > --principal spark/hadoop-master@platalyticsrealm >> --class >> > > > > > >> > > > com.platalytics.example.spark.App --master yarn >> > > > > > >> --driver-class-path >> > > > > > >> > > > /root/hbase-1.2.2/conf /home/vm6/project-1-jar-with-d >> > > > > > >> ependencies.jar >> > > > > > >> > > > >> > > > > > >> > > > I get the error: >> > > > > > >> > > > >> > > > > > >> > > > 2016-08-07 20:43:57,617 WARN >> > > > > > >> > > > [hconnection-0x24b5fa45-metaLookup-shared--pool2-t1] >> > > > > > >> > > > ipc.RpcClientImpl: Exception encountered while >> connecting >> > to >> > > > the >> > > > > > >> > server : >> > > > > > >> > > > javax.security.sasl.SaslException: GSS initiate failed >> > > > [Caused >> > > > > by >> > > > > > >> > > > GSSException: No valid credentials provided (Mechanism >> > > level: >> > > > > > >> Failed to >> > > > > > >> > > > find any Kerberos tgt)] 2016-08-07 20:43:57,619 ERROR >> > > > > > >> > > > [hconnection-0x24b5fa45-metaLookup-shared--pool2-t1] >> > > > > > >> > ipc.RpcClientImpl: >> > > > > > >> > > > SASL authentication failed. The most likely cause is >> > missing >> > > > or >> > > > > > >> invalid >> > > > > > >> > > > credentials. Consider 'kinit'. javax.security.sasl. >> > > > > SaslException: >> > > > > > >> GSS >> > > > > > >> > > > initiate failed [Caused by GSSException: No valid >> > > credentials >> > > > > > >> provided >> > > > > > >> > > > (Mechanism level: Failed to find any Kerberos tgt)] at >> > > > > > >> > > > com.sun.security.sasl.gsskerb.GssKrb5Client. >> > > > evaluateChallenge( >> > > > > > >> > > > GssKrb5Client.java:212) >> > > > > > >> > > > at org.apache.hadoop.hbase.securi >> ty.HBaseSaslRpcClient. >> > > > > > saslConnect( >> > > > > > >> > > > HBaseSaslRpcClient.java:179) >> > > > > > >> > > > at org.apache.hadoop.hbase.ipc.Rp >> cClientImpl$Connection. >> > > > > > >> > > > setupSaslConnection(RpcClientImpl.java:617) >> > > > > > >> > > > at org.apache.hadoop.hbase.ipc.Rp >> cClientImpl$Connection. >> > > > > > >> > > > access$700(RpcClientImpl.java:162) at >> > > > > > org.apache.hadoop.hbase.ipc. >> > > > > > >> > > > RpcClientImpl$Connection$2.run(RpcClientImpl.java:743) >> > > > > > >> > > > >> > > > > > >> > > > Following is my code: >> > > > > > >> > > > >> > > > > > >> > > > System.setProperty("java.security.krb5.conf", >> > > > > "/etc/krb5.conf"); >> > > > > > >> > > > System.setProperty("java.security.auth.login.config", >> > > > > > >> > > > "/etc/hbase/conf/zk-jaas.conf"); >> > > > > > >> > > > >> > > > > > >> > > > val hconf = HBaseConfiguration.create() >> > > > > > >> > > > val tableName = "emp" >> > > > > > >> > > > hconf.set("hbase.zookeeper.quorum", "hadoop-master") >> > > > > > >> > > > hconf.set(TableInputFormat.INPUT_TABLE, tableName) >> > > > > > >> > > > hconf.set("hbase.zookeeper.property.clientPort", >> > "2181") >> > > > > > >> > > > hconf.set("hadoop.security.authentication", >> "kerberos") >> > > > > > >> > > > hconf.set("hbase.security.authentication", >> "kerberos") >> > > > > > >> > > > hconf.addResource(new Path("/etc/hbase/conf/core- >> > > > site.xml")) >> > > > > > >> > > > hconf.addResource(new Path("/etc/hbase/conf/hbase- >> > > > site.xml")) >> > > > > > >> > > > UserGroupInformation.setConfiguration(hconf) >> > > > > > >> > > > val keyTab = "/etc/hadoop/conf/spark.keytab" >> > > > > > >> > > > val ugi = UserGroupInformation. >> > > > loginUserFromKeytabAndReturnUG >> > > > > > >> > > > I("spark/hadoop-master@platalyticsrealm", keyTab) >> > > > > > >> > > > UserGroupInformation.setLoginUser(ugi) >> > > > > > >> > > > ugi.doAs(new PrivilegedExceptionAction[Void]() { >> > > > > > >> > > > override def run(): Void = { >> > > > > > >> > > > val conf = new SparkConf >> > > > > > >> > > > val sc = new SparkContext(conf) >> > > > > > >> > > > sc.addFile(keyTab) >> > > > > > >> > > > var hBaseRDD = sc.newAPIHadoopRDD(hconf, >> > > > > > >> classOf[TableInputFormat], >> > > > > > >> > > > classOf[org.apache.hadoop.hbase.io. >> > > > > ImmutableBytesWritable], >> > > > > > >> > > > classOf[org.apache.hadoop.hbase.client.Result]) >> > > > > > >> > > > println("Number of Records found : " + >> > hBaseRDD.count()) >> > > > > > >> > > > hBaseRDD.foreach(x => { >> > > > > > >> > > > println(new String(x._2.getRow())) >> > > > > > >> > > > }) >> > > > > > >> > > > sc.stop() >> > > > > > >> > > > return null >> > > > > > >> > > > } >> > > > > > >> > > > }) >> > > > > > >> > > > >> > > > > > >> > > > Please have a look. And help me try finding the issue. >> > > > > > >> > > > >> > > > > > >> > > > Thanks >> > > > > > >> > > > >> > > > > > >> > > >> > > > > > >> > >> > > > > > >> >> > > > > > > >> > > > > > > >> > > > > > >> > > > > >> > > > >> > > >> > >> > >