http://git-wip-us.apache.org/repos/asf/spark-website/blob/a6d9cbde/site/docs/2.1.2/api/java/org/apache/spark/api/java/JavaSparkContext.html ---------------------------------------------------------------------- diff --git a/site/docs/2.1.2/api/java/org/apache/spark/api/java/JavaSparkContext.html b/site/docs/2.1.2/api/java/org/apache/spark/api/java/JavaSparkContext.html new file mode 100644 index 0000000..6927b66 --- /dev/null +++ b/site/docs/2.1.2/api/java/org/apache/spark/api/java/JavaSparkContext.html @@ -0,0 +1,2088 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<!-- NewPage --> +<html lang="en"> +<head> +<!-- Generated by javadoc (1.7.0_151) on Mon Oct 02 14:47:27 PDT 2017 --> +<title>JavaSparkContext (Spark 2.1.2 JavaDoc)</title> +<meta name="date" content="2017-10-02"> +<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style"> +</head> +<body> +<script type="text/javascript"><!-- + try { + if (location.href.indexOf('is-external=true') == -1) { + parent.document.title="JavaSparkContext (Spark 2.1.2 JavaDoc)"; + } + } + catch(err) { + } +//--> +</script> +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<!-- ========= START OF TOP NAVBAR ======= --> +<div class="topNav"><a name="navbar_top"> +<!-- --> +</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow"> +<!-- --> +</a> +<ul class="navList" title="Navigation"> +<li><a href="../../../../../overview-summary.html">Overview</a></li> +<li><a href="package-summary.html">Package</a></li> +<li class="navBarCell1Rev">Class</li> +<li><a href="package-tree.html">Tree</a></li> +<li><a href="../../../../../deprecated-list.html">Deprecated</a></li> +<li><a href="../../../../../index-all.html">Index</a></li> +<li><a href="../../../../../help-doc.html">Help</a></li> +</ul> +</div> +<div class="subNav"> +<ul class="navList"> +<li><a href="../../../../../org/apache/spark/api/java/JavaRDDLike.html" title="interface in org.apache.spark.api.java"><span class="strong">Prev Class</span></a></li> +<li><a href="../../../../../org/apache/spark/api/java/JavaSparkStatusTracker.html" title="class in org.apache.spark.api.java"><span class="strong">Next Class</span></a></li> +</ul> +<ul class="navList"> +<li><a href="../../../../../index.html?org/apache/spark/api/java/JavaSparkContext.html" target="_top">Frames</a></li> +<li><a href="JavaSparkContext.html" target="_top">No Frames</a></li> +</ul> +<ul class="navList" id="allclasses_navbar_top"> +<li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> +</ul> +<div> +<script type="text/javascript"><!-- + allClassesLink = document.getElementById("allclasses_navbar_top"); + if(window==top) { + allClassesLink.style.display = "block"; + } + else { + allClassesLink.style.display = "none"; + } + //--> +</script> +</div> +<div> +<ul class="subNavList"> +<li>Summary: </li> +<li>Nested | </li> +<li>Field | </li> +<li><a href="#constructor_summary">Constr</a> | </li> +<li><a href="#method_summary">Method</a></li> +</ul> +<ul class="subNavList"> +<li>Detail: </li> +<li>Field | </li> +<li><a href="#constructor_detail">Constr</a> | </li> +<li><a href="#method_detail">Method</a></li> +</ul> +</div> +<a name="skip-navbar_top"> +<!-- --> +</a></div> +<!-- ========= END OF TOP NAVBAR ========= --> +<!-- ======== START OF CLASS DATA ======== --> +<div class="header"> +<div class="subTitle">org.apache.spark.api.java</div> +<h2 title="Class JavaSparkContext" class="title">Class JavaSparkContext</h2> +</div> +<div class="contentContainer"> +<ul class="inheritance"> +<li>Object</li> +<li> +<ul class="inheritance"> +<li>org.apache.spark.api.java.JavaSparkContext</li> +</ul> +</li> +</ul> +<div class="description"> +<ul class="blockList"> +<li class="blockList"> +<dl> +<dt>All Implemented Interfaces:</dt> +<dd>java.io.Closeable, AutoCloseable</dd> +</dl> +<hr> +<br> +<pre>public class <span class="strong">JavaSparkContext</span> +extends Object +implements java.io.Closeable</pre> +<div class="block">A Java-friendly version of <a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark"><code>SparkContext</code></a> that returns + <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java"><code>JavaRDD</code></a>s and works with Java collections instead of Scala ones. + <p> + Only one SparkContext may be active per JVM. You must <code>stop()</code> the active SparkContext before + creating a new one. This limitation may eventually be removed; see SPARK-2243 for more details.</div> +</li> +</ul> +</div> +<div class="summary"> +<ul class="blockList"> +<li class="blockList"> +<!-- ======== CONSTRUCTOR SUMMARY ======== --> +<ul class="blockList"> +<li class="blockList"><a name="constructor_summary"> +<!-- --> +</a> +<h3>Constructor Summary</h3> +<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> +<caption><span>Constructors</span><span class="tabEnd"> </span></caption> +<tr> +<th class="colOne" scope="col">Constructor and Description</th> +</tr> +<tr class="altColor"> +<td class="colOne"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext()">JavaSparkContext</a></strong>()</code> +<div class="block">Create a JavaSparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit).</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext(org.apache.spark.SparkConf)">JavaSparkContext</a></strong>(<a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</code> </td> +</tr> +<tr class="altColor"> +<td class="colOne"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext(org.apache.spark.SparkContext)">JavaSparkContext</a></strong>(<a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext(java.lang.String,%20java.lang.String)">JavaSparkContext</a></strong>(String master, + String appName)</code> </td> +</tr> +<tr class="altColor"> +<td class="colOne"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext(java.lang.String,%20java.lang.String,%20org.apache.spark.SparkConf)">JavaSparkContext</a></strong>(String master, + String appName, + <a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext(java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String)">JavaSparkContext</a></strong>(String master, + String appName, + String sparkHome, + String jarFile)</code> </td> +</tr> +<tr class="altColor"> +<td class="colOne"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext(java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String[])">JavaSparkContext</a></strong>(String master, + String appName, + String sparkHome, + String[] jars)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext(java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String[],%20java.util.Map)">JavaSparkContext</a></strong>(String master, + String appName, + String sparkHome, + String[] jars, + java.util.Map<String,String> environment)</code> </td> +</tr> +</table> +</li> +</ul> +<!-- ========== METHOD SUMMARY =========== --> +<ul class="blockList"> +<li class="blockList"><a name="method_summary"> +<!-- --> +</a> +<h3>Method Summary</h3> +<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> +<caption><span>Methods</span><span class="tabEnd"> </span></caption> +<tr> +<th class="colFirst" scope="col">Modifier and Type</th> +<th class="colLast" scope="col">Method and Description</th> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T,R> <a href="../../../../../org/apache/spark/Accumulable.html" title="class in org.apache.spark">Accumulable</a><T,R></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulable(T,%20org.apache.spark.AccumulableParam)">accumulable</a></strong>(T initialValue, + <a href="../../../../../org/apache/spark/AccumulableParam.html" title="interface in org.apache.spark">AccumulableParam</a><T,R> param)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use AccumulatorV2. Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T,R> <a href="../../../../../org/apache/spark/Accumulable.html" title="class in org.apache.spark">Accumulable</a><T,R></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulable(T,%20java.lang.String,%20org.apache.spark.AccumulableParam)">accumulable</a></strong>(T initialValue, + String name, + <a href="../../../../../org/apache/spark/AccumulableParam.html" title="interface in org.apache.spark">AccumulableParam</a><T,R> param)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use AccumulatorV2. Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Double></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator(double)">accumulator</a></strong>(double initialValue)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use sc().doubleAccumulator(). Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Double></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator(double,%20java.lang.String)">accumulator</a></strong>(double initialValue, + String name)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use sc().doubleAccumulator(String). Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Integer></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator(int)">accumulator</a></strong>(int initialValue)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use sc().longAccumulator(). Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Integer></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator(int,%20java.lang.String)">accumulator</a></strong>(int initialValue, + String name)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use sc().longAccumulator(String). Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator(T,%20org.apache.spark.AccumulatorParam)">accumulator</a></strong>(T initialValue, + <a href="../../../../../org/apache/spark/AccumulatorParam.html" title="interface in org.apache.spark">AccumulatorParam</a><T> accumulatorParam)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use AccumulatorV2. Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator(T,%20java.lang.String,%20org.apache.spark.AccumulatorParam)">accumulator</a></strong>(T initialValue, + String name, + <a href="../../../../../org/apache/spark/AccumulatorParam.html" title="interface in org.apache.spark">AccumulatorParam</a><T> accumulatorParam)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use AccumulatorV2. Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#addFile(java.lang.String)">addFile</a></strong>(String path)</code> +<div class="block">Add a file to be downloaded with this Spark job on every node.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#addFile(java.lang.String,%20boolean)">addFile</a></strong>(String path, + boolean recursive)</code> +<div class="block">Add a file to be downloaded with this Spark job on every node.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#addJar(java.lang.String)">addJar</a></strong>(String path)</code> +<div class="block">Adds a JAR dependency for all tasks to be executed on this SparkContext in the future.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#appName()">appName</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,<a href="../../../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#binaryFiles(java.lang.String)">binaryFiles</a></strong>(String path)</code> +<div class="block">Read a directory of binary files from HDFS, a local file system (available on all nodes), + or any Hadoop-supported file system URI as a byte array.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,<a href="../../../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#binaryFiles(java.lang.String,%20int)">binaryFiles</a></strong>(String path, + int minPartitions)</code> +<div class="block">Read a directory of binary files from HDFS, a local file system (available on all nodes), + or any Hadoop-supported file system URI as a byte array.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><byte[]></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#binaryRecords(java.lang.String,%20int)">binaryRecords</a></strong>(String path, + int recordLength)</code> +<div class="block">Load data from a flat binary file, assuming the length of each record is constant.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast">Broadcast</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#broadcast(T)">broadcast</a></strong>(T value)</code> +<div class="block">Broadcast a read-only variable to the cluster, returning a + <a href="../../../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast"><code>Broadcast</code></a> object for reading it in distributed functions.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#cancelAllJobs()">cancelAllJobs</a></strong>()</code> +<div class="block">Cancel all jobs that have been scheduled or are running.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#cancelJobGroup(java.lang.String)">cancelJobGroup</a></strong>(String groupId)</code> +<div class="block">Cancel active jobs for the specified group.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#clearCallSite()">clearCallSite</a></strong>()</code> +<div class="block">Pass-through to SparkContext.setCallSite.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#clearJobGroup()">clearJobGroup</a></strong>()</code> +<div class="block">Clear the current thread's job group ID and its description.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#close()">close</a></strong>()</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>Integer</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#defaultMinPartitions()">defaultMinPartitions</a></strong>()</code> +<div class="block">Default min number of partitions for Hadoop RDDs when not given by user</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>Integer</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#defaultParallelism()">defaultParallelism</a></strong>()</code> +<div class="block">Default level of parallelism to use when not given by user (e.g.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Double></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#doubleAccumulator(double)">doubleAccumulator</a></strong>(double initialValue)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use sc().doubleAccumulator(). Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Double></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#doubleAccumulator(double,%20java.lang.String)">doubleAccumulator</a></strong>(double initialValue, + String name)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use sc().doubleAccumulator(String). Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#emptyRDD()">emptyRDD</a></strong>()</code> +<div class="block">Get an RDD that has no partitions or elements.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>static <a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html" title="class in org.apache.spark.api.java">JavaSparkContext</a></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#fromSparkContext(org.apache.spark.SparkContext)">fromSparkContext</a></strong>(<a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/Optional.html" title="class in org.apache.spark.api.java">Optional</a><String></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getCheckpointDir()">getCheckpointDir</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getConf()">getConf</a></strong>()</code> +<div class="block">Return a copy of this JavaSparkContext's configuration.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getLocalProperty(java.lang.String)">getLocalProperty</a></strong>(String key)</code> +<div class="block">Get a local property set in this thread, or null if it is missing.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>java.util.Map<Integer,<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><?>></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getPersistentRDDs()">getPersistentRDDs</a></strong>()</code> +<div class="block">Returns a Java map of JavaRDDs that have marked themselves as persistent via cache() call.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/Optional.html" title="class in org.apache.spark.api.java">Optional</a><String></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getSparkHome()">getSparkHome</a></strong>()</code> +<div class="block">Get Spark's home location from either a value set through the constructor, + or the spark.home Java property, or the SPARK_HOME environment variable + (in that order of preference).</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>org.apache.hadoop.conf.Configuration</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopConfiguration()">hadoopConfiguration</a></strong>()</code> +<div class="block">Returns the Hadoop configuration used for the Hadoop code (e.g.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopFile(java.lang.String,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class)">hadoopFile</a></strong>(String path, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass)</code> +<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopFile(java.lang.String,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class,%20int)">hadoopFile</a></strong>(String path, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopRDD(org.apache.hadoop.mapred.JobConf,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class)">hadoopRDD</a></strong>(org.apache.hadoop.mapred.JobConf conf, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass)</code> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any + other necessary info (e.g.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopRDD(org.apache.hadoop.mapred.JobConf,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class,%20int)">hadoopRDD</a></strong>(org.apache.hadoop.mapred.JobConf conf, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any + other necessary info (e.g.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Integer></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#intAccumulator(int)">intAccumulator</a></strong>(int initialValue)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use sc().longAccumulator(). Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Integer></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#intAccumulator(int,%20java.lang.String)">intAccumulator</a></strong>(int initialValue, + String name)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use sc().longAccumulator(String). Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>Boolean</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#isLocal()">isLocal</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>static String[]</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#jarOfClass(java.lang.Class)">jarOfClass</a></strong>(Class<?> cls)</code> +<div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass + their JARs to SparkContext.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>static String[]</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#jarOfObject(java.lang.Object)">jarOfObject</a></strong>(Object obj)</code> +<div class="block">Find the JAR that contains the class of a particular object, to make it easy for users + to pass their JARs to SparkContext.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>java.util.List<String></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#jars()">jars</a></strong>()</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#master()">master</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#newAPIHadoopFile(java.lang.String,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class,%20org.apache.hadoop.conf.Configuration)">newAPIHadoopFile</a></strong>(String path, + Class<F> fClass, + Class<K> kClass, + Class<V> vClass, + org.apache.hadoop.conf.Configuration conf)</code> +<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat + and extra configuration options to pass to the input format.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#newAPIHadoopRDD(org.apache.hadoop.conf.Configuration,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class)">newAPIHadoopRDD</a></strong>(org.apache.hadoop.conf.Configuration conf, + Class<F> fClass, + Class<K> kClass, + Class<V> vClass)</code> +<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat + and extra configuration options to pass to the input format.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#objectFile(java.lang.String)">objectFile</a></strong>(String path)</code> +<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and + BytesWritable values that contain a serialized partition.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#objectFile(java.lang.String,%20int)">objectFile</a></strong>(String path, + int minPartitions)</code> +<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and + BytesWritable values that contain a serialized partition.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelize(java.util.List)">parallelize</a></strong>(java.util.List<T> list)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelize(java.util.List,%20int)">parallelize</a></strong>(java.util.List<T> list, + int numSlices)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelizeDoubles(java.util.List)">parallelizeDoubles</a></strong>(java.util.List<Double> list)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelizeDoubles(java.util.List,%20int)">parallelizeDoubles</a></strong>(java.util.List<Double> list, + int numSlices)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelizePairs(java.util.List)">parallelizePairs</a></strong>(java.util.List<scala.Tuple2<K,V>> list)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelizePairs(java.util.List,%20int)">parallelizePairs</a></strong>(java.util.List<scala.Tuple2<K,V>> list, + int numSlices)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#sc()">sc</a></strong>()</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#sequenceFile(java.lang.String,%20java.lang.Class,%20java.lang.Class)">sequenceFile</a></strong>(String path, + Class<K> keyClass, + Class<V> valueClass)</code> +<div class="block">Get an RDD for a Hadoop SequenceFile.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#sequenceFile(java.lang.String,%20java.lang.Class,%20java.lang.Class,%20int)">sequenceFile</a></strong>(String path, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setCallSite(java.lang.String)">setCallSite</a></strong>(String site)</code> +<div class="block">Pass-through to SparkContext.setCallSite.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setCheckpointDir(java.lang.String)">setCheckpointDir</a></strong>(String dir)</code> +<div class="block">Set the directory under which RDDs are going to be checkpointed.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setJobGroup(java.lang.String,%20java.lang.String)">setJobGroup</a></strong>(String groupId, + String description)</code> +<div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a + different value or cleared.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setJobGroup(java.lang.String,%20java.lang.String,%20boolean)">setJobGroup</a></strong>(String groupId, + String description, + boolean interruptOnCancel)</code> +<div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a + different value or cleared.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setLocalProperty(java.lang.String,%20java.lang.String)">setLocalProperty</a></strong>(String key, + String value)</code> +<div class="block">Set a local property that affects jobs submitted from this thread, and all child + threads, such as the Spark fair scheduler pool.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setLogLevel(java.lang.String)">setLogLevel</a></strong>(String logLevel)</code> +<div class="block">Control our logLevel.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#sparkUser()">sparkUser</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>Long</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#startTime()">startTime</a></strong>()</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaSparkStatusTracker.html" title="class in org.apache.spark.api.java">JavaSparkStatusTracker</a></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#statusTracker()">statusTracker</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#stop()">stop</a></strong>()</code> +<div class="block">Shut down the SparkContext.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><String></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#textFile(java.lang.String)">textFile</a></strong>(String path)</code> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><String></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#textFile(java.lang.String,%20int)">textFile</a></strong>(String path, + int minPartitions)</code> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>static <a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#toSparkContext(org.apache.spark.api.java.JavaSparkContext)">toSparkContext</a></strong>(<a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html" title="class in org.apache.spark.api.java">JavaSparkContext</a> jsc)</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union(org.apache.spark.api.java.JavaDoubleRDD...)">union</a></strong>(<a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a>... rdds)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union(org.apache.spark.api.java.JavaDoubleRDD,%20java.util.List)">union</a></strong>(<a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a> first, + java.util.List<<a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a>> rest)</code> +<div class="block">Build the union of two or more RDDs.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union(org.apache.spark.api.java.JavaPairRDD...)">union</a></strong>(<a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V>... rdds)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union(org.apache.spark.api.java.JavaPairRDD,%20java.util.List)">union</a></strong>(<a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> first, + java.util.List<<a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V>> rest)</code> +<div class="block">Build the union of two or more RDDs.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union(org.apache.spark.api.java.JavaRDD...)">union</a></strong>(<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T>... rdds)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union(org.apache.spark.api.java.JavaRDD,%20java.util.List)">union</a></strong>(<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> first, + java.util.List<<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T>> rest)</code> +<div class="block">Build the union of two or more RDDs.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#version()">version</a></strong>()</code> +<div class="block">The version of Spark on which this application is running.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,String></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#wholeTextFiles(java.lang.String)">wholeTextFiles</a></strong>(String path)</code> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,String></code></td> +<td class="colLast"><code><strong><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#wholeTextFiles(java.lang.String,%20int)">wholeTextFiles</a></strong>(String path, + int minPartitions)</code> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI.</div> +</td> +</tr> +</table> +<ul class="blockList"> +<li class="blockList"><a name="methods_inherited_from_class_Object"> +<!-- --> +</a> +<h3>Methods inherited from class Object</h3> +<code>equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> +</ul> +</li> +</ul> +</li> +</ul> +</div> +<div class="details"> +<ul class="blockList"> +<li class="blockList"> +<!-- ========= CONSTRUCTOR DETAIL ======== --> +<ul class="blockList"> +<li class="blockList"><a name="constructor_detail"> +<!-- --> +</a> +<h3>Constructor Detail</h3> +<a name="JavaSparkContext(org.apache.spark.SparkContext)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(<a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc)</pre> +</li> +</ul> +<a name="JavaSparkContext()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext()</pre> +<div class="block">Create a JavaSparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit).</div> +</li> +</ul> +<a name="JavaSparkContext(org.apache.spark.SparkConf)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(<a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</pre> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>conf</code> - a <a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><code>SparkConf</code></a> object specifying Spark parameters</dd></dl> +</li> +</ul> +<a name="JavaSparkContext(java.lang.String, java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName)</pre> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd><dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd></dl> +</li> +</ul> +<a name="JavaSparkContext(java.lang.String, java.lang.String, org.apache.spark.SparkConf)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName, + <a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</pre> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd><dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd><dd><code>conf</code> - a <a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><code>SparkConf</code></a> object specifying other Spark parameters</dd></dl> +</li> +</ul> +<a name="JavaSparkContext(java.lang.String, java.lang.String, java.lang.String, java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName, + String sparkHome, + String jarFile)</pre> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd><dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd><dd><code>sparkHome</code> - The SPARK_HOME directory on the slave nodes</dd><dd><code>jarFile</code> - JAR file to send to the cluster. This can be a path on the local file system + or an HDFS, HTTP, HTTPS, or FTP URL.</dd></dl> +</li> +</ul> +<a name="JavaSparkContext(java.lang.String, java.lang.String, java.lang.String, java.lang.String[])"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName, + String sparkHome, + String[] jars)</pre> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd><dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd><dd><code>sparkHome</code> - The SPARK_HOME directory on the slave nodes</dd><dd><code>jars</code> - Collection of JARs to send to the cluster. These can be paths on the local file + system or HDFS, HTTP, HTTPS, or FTP URLs.</dd></dl> +</li> +</ul> +<a name="JavaSparkContext(java.lang.String, java.lang.String, java.lang.String, java.lang.String[], java.util.Map)"> +<!-- --> +</a> +<ul class="blockListLast"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName, + String sparkHome, + String[] jars, + java.util.Map<String,String> environment)</pre> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd><dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd><dd><code>sparkHome</code> - The SPARK_HOME directory on the slave nodes</dd><dd><code>jars</code> - Collection of JARs to send to the cluster. These can be paths on the local file + system or HDFS, HTTP, HTTPS, or FTP URLs.</dd><dd><code>environment</code> - Environment variables to set on worker nodes</dd></dl> +</li> +</ul> +</li> +</ul> +<!-- ============ METHOD DETAIL ========== --> +<ul class="blockList"> +<li class="blockList"><a name="method_detail"> +<!-- --> +</a> +<h3>Method Detail</h3> +<a name="fromSparkContext(org.apache.spark.SparkContext)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>fromSparkContext</h4> +<pre>public static <a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html" title="class in org.apache.spark.api.java">JavaSparkContext</a> fromSparkContext(<a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc)</pre> +</li> +</ul> +<a name="toSparkContext(org.apache.spark.api.java.JavaSparkContext)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>toSparkContext</h4> +<pre>public static <a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> toSparkContext(<a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html" title="class in org.apache.spark.api.java">JavaSparkContext</a> jsc)</pre> +</li> +</ul> +<a name="jarOfClass(java.lang.Class)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>jarOfClass</h4> +<pre>public static String[] jarOfClass(Class<?> cls)</pre> +<div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass + their JARs to SparkContext.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>cls</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="jarOfObject(java.lang.Object)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>jarOfObject</h4> +<pre>public static String[] jarOfObject(Object obj)</pre> +<div class="block">Find the JAR that contains the class of a particular object, to make it easy for users + to pass their JARs to SparkContext. In most cases you can call jarOfObject(this) in + your driver program.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>obj</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="sc()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>sc</h4> +<pre>public <a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc()</pre> +</li> +</ul> +<a name="statusTracker()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>statusTracker</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaSparkStatusTracker.html" title="class in org.apache.spark.api.java">JavaSparkStatusTracker</a> statusTracker()</pre> +</li> +</ul> +<a name="isLocal()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>isLocal</h4> +<pre>public Boolean isLocal()</pre> +</li> +</ul> +<a name="sparkUser()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>sparkUser</h4> +<pre>public String sparkUser()</pre> +</li> +</ul> +<a name="master()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>master</h4> +<pre>public String master()</pre> +</li> +</ul> +<a name="appName()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>appName</h4> +<pre>public String appName()</pre> +</li> +</ul> +<a name="jars()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>jars</h4> +<pre>public java.util.List<String> jars()</pre> +</li> +</ul> +<a name="startTime()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>startTime</h4> +<pre>public Long startTime()</pre> +</li> +</ul> +<a name="version()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>version</h4> +<pre>public String version()</pre> +<div class="block">The version of Spark on which this application is running.</div> +</li> +</ul> +<a name="defaultParallelism()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>defaultParallelism</h4> +<pre>public Integer defaultParallelism()</pre> +<div class="block">Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD).</div> +</li> +</ul> +<a name="defaultMinPartitions()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>defaultMinPartitions</h4> +<pre>public Integer defaultMinPartitions()</pre> +<div class="block">Default min number of partitions for Hadoop RDDs when not given by user</div> +</li> +</ul> +<a name="parallelize(java.util.List, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelize</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> parallelize(java.util.List<T> list, + int numSlices)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="emptyRDD()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>emptyRDD</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> emptyRDD()</pre> +<div class="block">Get an RDD that has no partitions or elements.</div> +</li> +</ul> +<a name="parallelize(java.util.List)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelize</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> parallelize(java.util.List<T> list)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="parallelizePairs(java.util.List, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelizePairs</h4> +<pre>public <K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> parallelizePairs(java.util.List<scala.Tuple2<K,V>> list, + int numSlices)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="parallelizePairs(java.util.List)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelizePairs</h4> +<pre>public <K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> parallelizePairs(java.util.List<scala.Tuple2<K,V>> list)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="parallelizeDoubles(java.util.List, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelizeDoubles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a> parallelizeDoubles(java.util.List<Double> list, + int numSlices)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="parallelizeDoubles(java.util.List)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelizeDoubles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a> parallelizeDoubles(java.util.List<Double> list)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="textFile(java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>textFile</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><String> textFile(String path)</pre> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="textFile(java.lang.String, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>textFile</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><String> textFile(String path, + int minPartitions)</pre> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="wholeTextFiles(java.lang.String, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>wholeTextFiles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,String> wholeTextFiles(String path, + int minPartitions)</pre> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI. Each file is read as a single record and returned in a + key-value pair, where the key is the path of each file, the value is the content of each file. + <p> + <p> For example, if you have the following files: + <pre><code> + hdfs://a-hdfs-path/part-00000 + hdfs://a-hdfs-path/part-00001 + ... + hdfs://a-hdfs-path/part-nnnnn + </code></pre> + <p> + Do + <pre><code> + JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path") + </code></pre> + <p> + <p> then <code>rdd</code> contains + <pre><code> + (a-hdfs-path/part-00000, its content) + (a-hdfs-path/part-00001, its content) + ... + (a-hdfs-path/part-nnnnn, its content) + </code></pre> + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd><dd><code>path</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Small files are preferred, large file is also allowable, but may cause bad performance. + <p></dd></dl> +</li> +</ul> +<a name="wholeTextFiles(java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>wholeTextFiles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,String> wholeTextFiles(String path)</pre> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI. Each file is read as a single record and returned in a + key-value pair, where the key is the path of each file, the value is the content of each file. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">See Also:</span></dt><dd><code>wholeTextFiles(path: String, minPartitions: Int)</code>.</dd></dl> +</li> +</ul> +<a name="binaryFiles(java.lang.String, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>binaryFiles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,<a href="../../../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>> binaryFiles(String path, + int minPartitions)</pre> +<div class="block">Read a directory of binary files from HDFS, a local file system (available on all nodes), + or any Hadoop-supported file system URI as a byte array. Each file is read as a single + record and returned in a key-value pair, where the key is the path of each file, + the value is the content of each file. + <p> + For example, if you have the following files: + <pre><code> + hdfs://a-hdfs-path/part-00000 + hdfs://a-hdfs-path/part-00001 + ... + hdfs://a-hdfs-path/part-nnnnn + </code></pre> + <p> + Do + <pre><code> + JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path") + </code></pre> + <p> + then <code>rdd</code> contains + <pre><code> + (a-hdfs-path/part-00000, its content) + (a-hdfs-path/part-00001, its content) + ... + (a-hdfs-path/part-nnnnn, its content) + </code></pre> + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd><dd><code>path</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Small files are preferred; very large files but may cause bad performance. + <p></dd></dl> +</li> +</ul> +<a name="binaryFiles(java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>binaryFiles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,<a href="../../../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>> binaryFiles(String path)</pre> +<div class="block">Read a directory of binary files from HDFS, a local file system (available on all nodes), + or any Hadoop-supported file system URI as a byte array. Each file is read as a single + record and returned in a key-value pair, where the key is the path of each file, + the value is the content of each file. + <p> + For example, if you have the following files: + <pre><code> + hdfs://a-hdfs-path/part-00000 + hdfs://a-hdfs-path/part-00001 + ... + hdfs://a-hdfs-path/part-nnnnn + </code></pre> + <p> + Do + <pre><code> + JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path") + </code></pre>, + <p> + then <code>rdd</code> contains + <pre><code> + (a-hdfs-path/part-00000, its content) + (a-hdfs-path/part-00001, its content) + ... + (a-hdfs-path/part-nnnnn, its content) + </code></pre> + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Small files are preferred; very large files but may cause bad performance.</dd></dl> +</li> +</ul> +<a name="binaryRecords(java.lang.String, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>binaryRecords</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><byte[]> binaryRecords(String path, + int recordLength)</pre> +<div class="block">Load data from a flat binary file, assuming the length of each record is constant. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - Directory to the input data files</dd><dd><code>recordLength</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>An RDD of data with values, represented as byte arrays</dd></dl> +</li> +</ul> +<a name="sequenceFile(java.lang.String, java.lang.Class, java.lang.Class, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>sequenceFile</h4> +<pre>public <K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> sequenceFile(String path, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</pre> +<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>keyClass</code> - (undocumented)</dd><dd><code>valueClass</code> - (undocumented)</dd><dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="sequenceFile(java.lang.String, java.lang.Class, java.lang.Class)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>sequenceFile</h4> +<pre>public <K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> sequenceFile(String path, + Class<K> keyClass, + Class<V> valueClass)</pre> +<div class="block">Get an RDD for a Hadoop SequenceFile. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>keyClass</code> - (undocumented)</dd><dd><code>valueClass</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="objectFile(java.lang.String, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>objectFile</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> objectFile(String path, + int minPartitions)</pre> +<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and + BytesWritable values that contain a serialized partition. This is still an experimental storage + format and may not be supported exactly as is in future Spark releases. It will also be pretty + slow if you use the default serializer (Java serialization), though the nice thing about it is + that there's very little effort required to save arbitrary objects.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="objectFile(java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>objectFile</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> objectFile(String path)</pre> +<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and + BytesWritable values that contain a serialized partition. This is still an experimental storage + format and may not be supported exactly as is in future Spark releases. It will also be pretty + slow if you use the default serializer (Java serialization), though the nice thing about it is + that there's very little effort required to save arbitrary objects.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="hadoopRDD(org.apache.hadoop.mapred.JobConf, java.lang.Class, java.lang.Class, java.lang.Class, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopRDD</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> hadoopRDD(org.apache.hadoop.mapred.JobConf conf, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</pre> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any + other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable, + etc). + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>conf</code> - JobConf for setting up the dataset. Note: This will be put into a Broadcast. + Therefore if you plan to reuse this conf to create multiple RDDs, you need to make + sure you won't modify the conf. A safe approach is always creating a new conf for + a new RDD.</dd><dd><code>inputFormatClass</code> - Class of the InputFormat</dd><dd><code>keyClass</code> - Class of the keys</dd><dd><code>valueClass</code> - Class of the values</dd><dd><code>minPartitions</code> - Minimum number of Hadoop Splits to generate. + <p></dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="hadoopRDD(org.apache.hadoop.mapred.JobConf, java.lang.Class, java.lang.Class, java.lang.Class)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopRDD</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> hadoopRDD(org.apache.hadoop.mapred.JobConf conf, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass)</pre> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any + other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable, + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>conf</code> - JobConf for setting up the dataset. Note: This will be put into a Broadcast. + Therefore if you plan to reuse this conf to create multiple RDDs, you need to make + sure you won't modify the conf. A safe approach is always creating a new conf for + a new RDD.</dd><dd><code>inputFormatClass</code> - Class of the InputFormat</dd><dd><code>keyClass</code> - Class of the keys</dd><dd><code>valueClass</code> - Class of the values + <p></dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="hadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopFile</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> hadoopFile(String path, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</pre> +<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>inputFormatClass</code> - (undocumented)</dd><dd><code>keyClass</code> - (undocumented)</dd><dd><code>valueClass</code> - (undocumented)</dd><dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="hadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopFile</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> hadoopFile(String path, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass)</pre> +<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>inputFormatClass</code> - (undocumented)</dd><dd><code>keyClass</code> - (undocumented)</dd><dd><code>valueClass</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="newAPIHadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class, org.apache.hadoop.conf.Configuration)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>newAPIHadoopFile</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> newAPIHadoopFile(String path, + Class<F> fClass, + Class<K> kClass, + Class<V> vClass, + org.apache.hadoop.conf.Configuration conf)</pre> +<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat + and extra configuration options to pass to the input format. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>fClass</code> - (undocumented)</dd><dd><code>kClass</code> - (undocumented)</dd><dd><code>vClass</code> - (undocumented)</dd><dd><code>conf</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="newAPIHadoopRDD(org.apache.hadoop.conf.Configuration, java.lang.Class, java.lang.Class, java.lang.Class)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>newAPIHadoopRDD</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> newAPIHadoopRDD(org.apache.hadoop.conf.Configuration conf, + Class<F> fClass, + Class<K> kClass, + Class<V> vClass)</pre> +<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat + and extra configuration options to pass to the input format. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>conf</code> - Configuration for setting up the dataset. Note: This will be put into a Broadcast. + Therefore if you plan to reuse this conf to create multiple RDDs, you need to make + sure you won't modify the conf. A safe approach is always creating a new conf for + a new RDD.</dd><dd><code>fClass</code> - Class of the InputFormat</dd><dd><code>kClass</code> - Class of the keys</dd><dd><code>vClass</code> - Class of the values + <p></dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="union(org.apache.spark.api.java.JavaRDD, java.util.List)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>union</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> union(<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> first, + java.util.List<<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a
<TRUNCATED> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org