You can download and compile spark against your existing hadoop version.
Here's a quick start
https://spark.apache.org/docs/latest/cluster-overview.html#cluster-manager-types
You can also read a bit here
http://docs.sigmoidanalytics.com/index.php/Installing_Spark_andSetting_Up_Your_Cluster
( the version is quiet old)
Attached is a piece of Code (Spark Java API) to connect to HBase.
Thanks
Best Regards
On Thu, Aug 7, 2014 at 1:48 PM, Deepa Jayaveer <[email protected]>
wrote:
> Hi
> I read your white paper about " " . We wanted to do a Proof of Concept on
> Spark with HBase. Documents
> are not much available to set up the spark cluster in Hadoop 2
> environment. If you have any,
> can you please give us some reference URLs
> Also, some sample program to connect to HBase using Spark Java API
>
> Thanks
> Deepa
>
> =====-----=====-----=====
> Notice: The information contained in this e-mail
> message and/or attachments to it may contain
> confidential or privileged information. If you are
> not the intended recipient, any dissemination, use,
> review, distribution, printing or copying of the
> information contained in this e-mail message
> and/or attachments to it are strictly prohibited. If
> you have received this communication in error,
> please notify us by reply e-mail or telephone and
> immediately and permanently delete the message
> and any attachments. Thank you
>
>
import java.util.Iterator;
import java.util.List;
import org.apache.commons.configuration.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.rdd.NewHadoopRDD;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import com.google.common.collect.Lists;
import scala.Function1;
import scala.Tuple2;
import scala.collection.JavaConversions;
import scala.collection.Seq;
import scala.collection.JavaConverters.*;
import scala.reflect.ClassTag;
public class SparkHBaseMain {
@SuppressWarnings("deprecation")
public static void main(String[] arg){
try{
List<String> jars = Lists.newArrayList("/home/akhld/Desktop/tools/spark-9/jars/spark-assembly-0.9.0-incubating-hadoop2.3.0-mr1-cdh5.0.0.jar",
"/home/akhld/Downloads/sparkhbasecode/hbase-server-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/sparkhbasecode/hbase-protocol-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/sparkhbasecode/hbase-hadoop2-compat-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/sparkhbasecode/hbase-common-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/sparkhbasecode/hbase-client-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/sparkhbasecode/htrace-core-2.02.jar");
SparkConf spconf = new SparkConf();
spconf.setMaster("local");
spconf.setAppName("SparkHBase");
spconf.setSparkHome("/home/akhld/Desktop/tools/spark-9");
spconf.setJars(jars.toArray(new String[jars.size()]));
spconf.set("spark.executor.memory", "1g");
final JavaSparkContext sc = new JavaSparkContext(spconf);
org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
conf.addResource("/home/akhld/Downloads/sparkhbasecode/hbase-site.xml");
conf.set(TableInputFormat.INPUT_TABLE, "blogposts");
NewHadoopRDD<ImmutableBytesWritable, Result> rdd = new NewHadoopRDD<ImmutableBytesWritable, Result>(JavaSparkContext.toSparkContext(sc), TableInputFormat.class, ImmutableBytesWritable.class, Result.class, conf);
JavaRDD<Tuple2<ImmutableBytesWritable, Result>> jrdd = rdd.toJavaRDD();
ForEachFunction f = new ForEachFunction();
JavaRDD<Iterator<String>> retrdd = jrdd.map(f);
System.out.println("Count =>" + retrdd.count());
}catch(Exception e){
e.printStackTrace();
System.out.println("Craaaashed : " + e);
}
}
@SuppressWarnings("serial")
private static class ForEachFunction extends Function<Tuple2<ImmutableBytesWritable, Result>, Iterator<String>>{
public Iterator<String> call(Tuple2<ImmutableBytesWritable, Result> test) {
Result tmp = (Result) test._2;
List<KeyValue> kvl = tmp.getColumn("post".getBytes(), "title".getBytes());
for(KeyValue kl:kvl){
String sb = new String(kl.getValue());
System.out.println("Value :" + sb);
}
return null;
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]