http://git-wip-us.apache.org/repos/asf/spark-website/blob/3a634959/site/docs/2.1.3/api/java/org/apache/spark/SparkContext.html ---------------------------------------------------------------------- diff --git a/site/docs/2.1.3/api/java/org/apache/spark/SparkContext.html b/site/docs/2.1.3/api/java/org/apache/spark/SparkContext.html new file mode 100644 index 0000000..9a40fd6 --- /dev/null +++ b/site/docs/2.1.3/api/java/org/apache/spark/SparkContext.html @@ -0,0 +1,2523 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<!-- NewPage --> +<html lang="en"> +<head> +<!-- Generated by javadoc (version 1.7.0_80) on Tue Jun 26 17:20:44 UTC 2018 --> +<title>SparkContext (Spark 2.1.3 JavaDoc)</title> +<meta name="date" content="2018-06-26"> +<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style"> +</head> +<body> +<script type="text/javascript"><!-- + if (location.href.indexOf('is-external=true') == -1) { + parent.document.title="SparkContext (Spark 2.1.3 JavaDoc)"; + } +//--> +</script> +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<!-- ========= START OF TOP NAVBAR ======= --> +<div class="topNav"><a name="navbar_top"> +<!-- --> +</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow"> +<!-- --> +</a> +<ul class="navList" title="Navigation"> +<li><a href="../../../overview-summary.html">Overview</a></li> +<li><a href="package-summary.html">Package</a></li> +<li class="navBarCell1Rev">Class</li> +<li><a href="package-tree.html">Tree</a></li> +<li><a href="../../../deprecated-list.html">Deprecated</a></li> +<li><a href="../../../index-all.html">Index</a></li> +<li><a href="../../../help-doc.html">Help</a></li> +</ul> +</div> +<div class="subNav"> +<ul class="navList"> +<li><a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><span class="strong">Prev Class</span></a></li> +<li><a href="../../../org/apache/spark/SparkEnv.html" title="class in org.apache.spark"><span class="strong">Next Class</span></a></li> +</ul> +<ul class="navList"> +<li><a href="../../../index.html?org/apache/spark/SparkContext.html" target="_top">Frames</a></li> +<li><a href="SparkContext.html" target="_top">No Frames</a></li> +</ul> +<ul class="navList" id="allclasses_navbar_top"> +<li><a href="../../../allclasses-noframe.html">All Classes</a></li> +</ul> +<div> +<script type="text/javascript"><!-- + allClassesLink = document.getElementById("allclasses_navbar_top"); + if(window==top) { + allClassesLink.style.display = "block"; + } + else { + allClassesLink.style.display = "none"; + } + //--> +</script> +</div> +<div> +<ul class="subNavList"> +<li>Summary: </li> +<li>Nested | </li> +<li>Field | </li> +<li><a href="#constructor_summary">Constr</a> | </li> +<li><a href="#method_summary">Method</a></li> +</ul> +<ul class="subNavList"> +<li>Detail: </li> +<li>Field | </li> +<li><a href="#constructor_detail">Constr</a> | </li> +<li><a href="#method_detail">Method</a></li> +</ul> +</div> +<a name="skip-navbar_top"> +<!-- --> +</a></div> +<!-- ========= END OF TOP NAVBAR ========= --> +<!-- ======== START OF CLASS DATA ======== --> +<div class="header"> +<div class="subTitle">org.apache.spark</div> +<h2 title="Class SparkContext" class="title">Class SparkContext</h2> +</div> +<div class="contentContainer"> +<ul class="inheritance"> +<li>Object</li> +<li> +<ul class="inheritance"> +<li>org.apache.spark.SparkContext</li> +</ul> +</li> +</ul> +<div class="description"> +<ul class="blockList"> +<li class="blockList"> +<hr> +<br> +<pre>public class <span class="strong">SparkContext</span> +extends Object</pre> +<div class="block">Main entry point for Spark functionality. A SparkContext represents the connection to a Spark + cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster. + <p> + Only one SparkContext may be active per JVM. You must <code>stop()</code> the active SparkContext before + creating a new one. This limitation may eventually be removed; see SPARK-2243 for more details. + <p> + param: config a Spark Config object describing the application configuration. Any settings in + this config overrides the default configs as well as system properties.</div> +</li> +</ul> +</div> +<div class="summary"> +<ul class="blockList"> +<li class="blockList"> +<!-- ======== CONSTRUCTOR SUMMARY ======== --> +<ul class="blockList"> +<li class="blockList"><a name="constructor_summary"> +<!-- --> +</a> +<h3>Constructor Summary</h3> +<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> +<caption><span>Constructors</span><span class="tabEnd"> </span></caption> +<tr> +<th class="colOne" scope="col">Constructor and Description</th> +</tr> +<tr class="altColor"> +<td class="colOne"><code><strong><a href="../../../org/apache/spark/SparkContext.html#SparkContext()">SparkContext</a></strong>()</code> +<div class="block">Create a SparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit).</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><strong><a href="../../../org/apache/spark/SparkContext.html#SparkContext(org.apache.spark.SparkConf)">SparkContext</a></strong>(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> config)</code> </td> +</tr> +<tr class="altColor"> +<td class="colOne"><code><strong><a href="../../../org/apache/spark/SparkContext.html#SparkContext(java.lang.String,%20java.lang.String,%20org.apache.spark.SparkConf)">SparkContext</a></strong>(String master, + String appName, + <a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</code> +<div class="block">Alternative constructor that allows setting common Spark properties directly</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><strong><a href="../../../org/apache/spark/SparkContext.html#SparkContext(java.lang.String,%20java.lang.String,%20java.lang.String,%20scala.collection.Seq,%20scala.collection.Map)">SparkContext</a></strong>(String master, + String appName, + String sparkHome, + scala.collection.Seq<String> jars, + scala.collection.Map<String,String> environment)</code> +<div class="block">Alternative constructor that allows setting common Spark properties directly</div> +</td> +</tr> +</table> +</li> +</ul> +<!-- ========== METHOD SUMMARY =========== --> +<ul class="blockList"> +<li class="blockList"><a name="method_summary"> +<!-- --> +</a> +<h3>Method Summary</h3> +<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> +<caption><span>Methods</span><span class="tabEnd"> </span></caption> +<tr> +<th class="colFirst" scope="col">Modifier and Type</th> +<th class="colLast" scope="col">Method and Description</th> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><R,T> <a href="../../../org/apache/spark/Accumulable.html" title="class in org.apache.spark">Accumulable</a><R,T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#accumulable(R,%20org.apache.spark.AccumulableParam)">accumulable</a></strong>(R initialValue, + <a href="../../../org/apache/spark/AccumulableParam.html" title="interface in org.apache.spark">AccumulableParam</a><R,T> param)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use AccumulatorV2. Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><R,T> <a href="../../../org/apache/spark/Accumulable.html" title="class in org.apache.spark">Accumulable</a><R,T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#accumulable(R,%20java.lang.String,%20org.apache.spark.AccumulableParam)">accumulable</a></strong>(R initialValue, + String name, + <a href="../../../org/apache/spark/AccumulableParam.html" title="interface in org.apache.spark">AccumulableParam</a><R,T> param)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use AccumulatorV2. Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><R,T> <a href="../../../org/apache/spark/Accumulable.html" title="class in org.apache.spark">Accumulable</a><R,T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#accumulableCollection(R,%20scala.Function1,%20scala.reflect.ClassTag)">accumulableCollection</a></strong>(R initialValue, + scala.Function1<R,scala.collection.generic.Growable<T>> evidence$9, + scala.reflect.ClassTag<R> evidence$10)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use AccumulatorV2. Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#accumulator(T,%20org.apache.spark.AccumulatorParam)">accumulator</a></strong>(T initialValue, + <a href="../../../org/apache/spark/AccumulatorParam.html" title="interface in org.apache.spark">AccumulatorParam</a><T> param)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use AccumulatorV2. Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#accumulator(T,%20java.lang.String,%20org.apache.spark.AccumulatorParam)">accumulator</a></strong>(T initialValue, + String name, + <a href="../../../org/apache/spark/AccumulatorParam.html" title="interface in org.apache.spark">AccumulatorParam</a><T> param)</code> +<div class="block"><strong>Deprecated.</strong> +<div class="block"><i>use AccumulatorV2. Since 2.0.0.</i></div> +</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#addFile(java.lang.String)">addFile</a></strong>(String path)</code> +<div class="block">Add a file to be downloaded with this Spark job on every node.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#addFile(java.lang.String,%20boolean)">addFile</a></strong>(String path, + boolean recursive)</code> +<div class="block">Add a file to be downloaded with this Spark job on every node.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#addJar(java.lang.String)">addJar</a></strong>(String path)</code> +<div class="block">Adds a JAR dependency for all tasks to be executed on this SparkContext in the future.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#addSparkListener(org.apache.spark.scheduler.SparkListenerInterface)">addSparkListener</a></strong>(org.apache.spark.scheduler.SparkListenerInterface listener)</code> +<div class="block">:: DeveloperApi :: + Register a listener to receive up-calls from events that happen during execution.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>scala.Option<String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#applicationAttemptId()">applicationAttemptId</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#applicationId()">applicationId</a></strong>()</code> +<div class="block">A unique identifier for the Spark application.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#appName()">appName</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<String,<a href="../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#binaryFiles(java.lang.String,%20int)">binaryFiles</a></strong>(String path, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop-readable dataset as PortableDataStream for each file + (useful for binary data)</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><byte[]></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#binaryRecords(java.lang.String,%20int,%20org.apache.hadoop.conf.Configuration)">binaryRecords</a></strong>(String path, + int recordLength, + org.apache.hadoop.conf.Configuration conf)</code> +<div class="block">Load data from a flat binary file, assuming the length of each record is constant.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast">Broadcast</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#broadcast(T,%20scala.reflect.ClassTag)">broadcast</a></strong>(T value, + scala.reflect.ClassTag<T> evidence$11)</code> +<div class="block">Broadcast a read-only variable to the cluster, returning a + <a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast"><code>Broadcast</code></a> object for reading it in distributed functions.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#cancelAllJobs()">cancelAllJobs</a></strong>()</code> +<div class="block">Cancel all jobs that have been scheduled or are running.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#cancelJob(int)">cancelJob</a></strong>(int jobId)</code> +<div class="block">Cancel a given job if it's scheduled or running.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#cancelJobGroup(java.lang.String)">cancelJobGroup</a></strong>(String groupId)</code> +<div class="block">Cancel active jobs for the specified group.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#cancelStage(int)">cancelStage</a></strong>(int stageId)</code> +<div class="block">Cancel a given stage and all jobs associated with it.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#clearCallSite()">clearCallSite</a></strong>()</code> +<div class="block">Clear the thread-local property for overriding the call sites + of actions and RDDs.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#clearJobGroup()">clearJobGroup</a></strong>()</code> +<div class="block">Clear the current thread's job group ID and its description.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#collectionAccumulator()">collectionAccumulator</a></strong>()</code> +<div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates + inputs by adding them into the list.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#collectionAccumulator(java.lang.String)">collectionAccumulator</a></strong>(String name)</code> +<div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates + inputs by adding them into the list.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>int</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#defaultMinPartitions()">defaultMinPartitions</a></strong>()</code> +<div class="block">Default min number of partitions for Hadoop RDDs when not given by user + Notice that we use math.min so the "defaultMinPartitions" cannot be higher than 2.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>int</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#defaultParallelism()">defaultParallelism</a></strong>()</code> +<div class="block">Default level of parallelism to use when not given by user (e.g.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#deployMode()">deployMode</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#doubleAccumulator()">doubleAccumulator</a></strong>()</code> +<div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#doubleAccumulator(java.lang.String)">doubleAccumulator</a></strong>(String name)</code> +<div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#emptyRDD(scala.reflect.ClassTag)">emptyRDD</a></strong>(scala.reflect.ClassTag<T> evidence$8)</code> +<div class="block">Get an RDD that has no partitions or elements.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>scala.collection.Seq<String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#files()">files</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>scala.collection.Seq<org.apache.spark.scheduler.Schedulable></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getAllPools()">getAllPools</a></strong>()</code> +<div class="block">:: DeveloperApi :: + Return pools for fair scheduler</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>scala.Option<String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getCheckpointDir()">getCheckpointDir</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getConf()">getConf</a></strong>()</code> +<div class="block">Return a copy of this SparkContext's configuration.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>scala.collection.Map<String,scala.Tuple2<Object,Object>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getExecutorMemoryStatus()">getExecutorMemoryStatus</a></strong>()</code> +<div class="block">Return a map from the slave to the max memory available for caching and the remaining + memory available for caching.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/storage/StorageStatus.html" title="class in org.apache.spark.storage">StorageStatus</a>[]</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getExecutorStorageStatus()">getExecutorStorageStatus</a></strong>()</code> +<div class="block">:: DeveloperApi :: + Return information about blocks stored in all of the slaves</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getLocalProperty(java.lang.String)">getLocalProperty</a></strong>(String key)</code> +<div class="block">Get a local property set in this thread, or null if it is missing.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getOrCreate()">getOrCreate</a></strong>()</code> +<div class="block">This function may be used to get or instantiate a SparkContext and register it as a + singleton object.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getOrCreate(org.apache.spark.SparkConf)">getOrCreate</a></strong>(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> config)</code> +<div class="block">This function may be used to get or instantiate a SparkContext and register it as a + singleton object.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>scala.collection.Map<Object,<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><?>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getPersistentRDDs()">getPersistentRDDs</a></strong>()</code> +<div class="block">Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>scala.Option<org.apache.spark.scheduler.Schedulable></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getPoolForName(java.lang.String)">getPoolForName</a></strong>(String pool)</code> +<div class="block">:: DeveloperApi :: + Return the pool associated with the given name, if one exists</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/storage/RDDInfo.html" title="class in org.apache.spark.storage">RDDInfo</a>[]</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getRDDStorageInfo()">getRDDStorageInfo</a></strong>()</code> +<div class="block">:: DeveloperApi :: + Return information about what RDDs are cached, if they are in mem or on disk, how much space + they take, etc.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>scala.Enumeration.Value</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#getSchedulingMode()">getSchedulingMode</a></strong>()</code> +<div class="block">Return current scheduling mode</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>org.apache.hadoop.conf.Configuration</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#hadoopConfiguration()">hadoopConfiguration</a></strong>()</code> +<div class="block">A default Hadoop Configuration for the Hadoop code (e.g.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#hadoopFile(java.lang.String,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class,%20int)">hadoopFile</a></strong>(String path, + Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#hadoopFile(java.lang.String,%20scala.reflect.ClassTag,%20scala.reflect.ClassTag,%20scala.reflect.ClassTag)">hadoopFile</a></strong>(String path, + scala.reflect.ClassTag<K> km, + scala.reflect.ClassTag<V> vm, + scala.reflect.ClassTag<F> fm)</code> +<div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys, + values and the InputFormat so that users don't need to pass them directly.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#hadoopFile(java.lang.String,%20int,%20scala.reflect.ClassTag,%20scala.reflect.ClassTag,%20scala.reflect.ClassTag)">hadoopFile</a></strong>(String path, + int minPartitions, + scala.reflect.ClassTag<K> km, + scala.reflect.ClassTag<V> vm, + scala.reflect.ClassTag<F> fm)</code> +<div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys, + values and the InputFormat so that users don't need to pass them directly.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#hadoopRDD(org.apache.hadoop.mapred.JobConf,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class,%20int)">hadoopRDD</a></strong>(org.apache.hadoop.mapred.JobConf conf, + Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and other + necessary info (e.g.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>boolean</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#isLocal()">isLocal</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>boolean</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#isStopped()">isStopped</a></strong>()</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>static scala.Option<String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#jarOfClass(java.lang.Class)">jarOfClass</a></strong>(Class<?> cls)</code> +<div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass + their JARs to SparkContext.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>static scala.Option<String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#jarOfObject(java.lang.Object)">jarOfObject</a></strong>(Object obj)</code> +<div class="block">Find the JAR that contains the class of a particular object, to make it easy for users + to pass their JARs to SparkContext.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>scala.collection.Seq<String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#jars()">jars</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>boolean</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#killExecutor(java.lang.String)">killExecutor</a></strong>(String executorId)</code> +<div class="block">:: DeveloperApi :: + Request that the cluster manager kill the specified executor.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>boolean</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#killExecutors(scala.collection.Seq)">killExecutors</a></strong>(scala.collection.Seq<String> executorIds)</code> +<div class="block">:: DeveloperApi :: + Request that the cluster manager kill the specified executors.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>scala.collection.Seq<String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#listFiles()">listFiles</a></strong>()</code> +<div class="block">Returns a list of file paths that are added to resources.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>scala.collection.Seq<String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#listJars()">listJars</a></strong>()</code> +<div class="block">Returns a list of jar files that are added to resources.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#longAccumulator()">longAccumulator</a></strong>()</code> +<div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#longAccumulator(java.lang.String)">longAccumulator</a></strong>(String name)</code> +<div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#makeRDD(scala.collection.Seq,%20int,%20scala.reflect.ClassTag)">makeRDD</a></strong>(scala.collection.Seq<T> seq, + int numSlices, + scala.reflect.ClassTag<T> evidence$2)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#makeRDD(scala.collection.Seq,%20scala.reflect.ClassTag)">makeRDD</a></strong>(scala.collection.Seq<scala.Tuple2<T,scala.collection.Seq<String>>> seq, + scala.reflect.ClassTag<T> evidence$3)</code> +<div class="block">Distribute a local Scala collection to form an RDD, with one or more + location preferences (hostnames of Spark nodes) for each object.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#master()">master</a></strong>()</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#newAPIHadoopFile(java.lang.String,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class,%20org.apache.hadoop.conf.Configuration)">newAPIHadoopFile</a></strong>(String path, + Class<F> fClass, + Class<K> kClass, + Class<V> vClass, + org.apache.hadoop.conf.Configuration conf)</code> +<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat + and extra configuration options to pass to the input format.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#newAPIHadoopFile(java.lang.String,%20scala.reflect.ClassTag,%20scala.reflect.ClassTag,%20scala.reflect.ClassTag)">newAPIHadoopFile</a></strong>(String path, + scala.reflect.ClassTag<K> km, + scala.reflect.ClassTag<V> vm, + scala.reflect.ClassTag<F> fm)</code> +<div class="block">Get an RDD for a Hadoop file with an arbitrary new API InputFormat.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#newAPIHadoopRDD(org.apache.hadoop.conf.Configuration,%20java.lang.Class,%20java.lang.Class,%20java.lang.Class)">newAPIHadoopRDD</a></strong>(org.apache.hadoop.conf.Configuration conf, + Class<F> fClass, + Class<K> kClass, + Class<V> vClass)</code> +<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat + and extra configuration options to pass to the input format.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#objectFile(java.lang.String,%20int,%20scala.reflect.ClassTag)">objectFile</a></strong>(String path, + int minPartitions, + scala.reflect.ClassTag<T> evidence$4)</code> +<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and + BytesWritable values that contain a serialized partition.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#parallelize(scala.collection.Seq,%20int,%20scala.reflect.ClassTag)">parallelize</a></strong>(scala.collection.Seq<T> seq, + int numSlices, + scala.reflect.ClassTag<T> evidence$1)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><Object></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#range(long,%20long,%20long,%20int)">range</a></strong>(long start, + long end, + long step, + int numSlices)</code> +<div class="block">Creates a new RDD[Long] containing elements from <code>start</code> to <code>end</code>(exclusive), increased by + <code>step</code> every element.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#register(org.apache.spark.util.AccumulatorV2)">register</a></strong>(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a><?,?> acc)</code> +<div class="block">Register the given accumulator.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#register(org.apache.spark.util.AccumulatorV2,%20java.lang.String)">register</a></strong>(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a><?,?> acc, + String name)</code> +<div class="block">Register the given accumulator with given name.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>boolean</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#requestExecutors(int)">requestExecutors</a></strong>(int numAdditionalExecutors)</code> +<div class="block">:: DeveloperApi :: + Request an additional number of executors from the cluster manager.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>boolean</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#requestTotalExecutors(int,%20int,%20scala.collection.immutable.Map)">requestTotalExecutors</a></strong>(int numExecutors, + int localityAwareTasks, + scala.collection.immutable.Map<String,Object> hostToLocalTaskCount)</code> +<div class="block">Update the cluster manager on our scheduling needs.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T,U,R> <a href="../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a><R></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#runApproximateJob(org.apache.spark.rdd.RDD,%20scala.Function2,%20,%20long)">runApproximateJob</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, + scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, + <any> evaluator, + long timeout)</code> +<div class="block">:: DeveloperApi :: + Run a job that can return approximate results.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T,U> Object</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#runJob(org.apache.spark.rdd.RDD,%20scala.Function1,%20scala.reflect.ClassTag)">runJob</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, + scala.Function1<scala.collection.Iterator<T>,U> func, + scala.reflect.ClassTag<U> evidence$16)</code> +<div class="block">Run a job on all partitions in an RDD and return the results in an array.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T,U> void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#runJob(org.apache.spark.rdd.RDD,%20scala.Function1,%20scala.Function2,%20scala.reflect.ClassTag)">runJob</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, + scala.Function1<scala.collection.Iterator<T>,U> processPartition, + scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, + scala.reflect.ClassTag<U> evidence$18)</code> +<div class="block">Run a job on all partitions in an RDD and pass the results to a handler function.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T,U> Object</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#runJob(org.apache.spark.rdd.RDD,%20scala.Function1,%20scala.collection.Seq,%20scala.reflect.ClassTag)">runJob</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, + scala.Function1<scala.collection.Iterator<T>,U> func, + scala.collection.Seq<Object> partitions, + scala.reflect.ClassTag<U> evidence$14)</code> +<div class="block">Run a job on a given set of partitions of an RDD, but take a function of type + <code>Iterator[T] => U</code> instead of <code>(TaskContext, Iterator[T]) => U</code>.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T,U> Object</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#runJob(org.apache.spark.rdd.RDD,%20scala.Function2,%20scala.reflect.ClassTag)">runJob</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, + scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, + scala.reflect.ClassTag<U> evidence$15)</code> +<div class="block">Run a job on all partitions in an RDD and return the results in an array.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T,U> void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#runJob(org.apache.spark.rdd.RDD,%20scala.Function2,%20scala.Function2,%20scala.reflect.ClassTag)">runJob</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, + scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> processPartition, + scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, + scala.reflect.ClassTag<U> evidence$17)</code> +<div class="block">Run a job on all partitions in an RDD and pass the results to a handler function.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T,U> Object</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#runJob(org.apache.spark.rdd.RDD,%20scala.Function2,%20scala.collection.Seq,%20scala.reflect.ClassTag)">runJob</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, + scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, + scala.collection.Seq<Object> partitions, + scala.reflect.ClassTag<U> evidence$13)</code> +<div class="block">Run a function on a given set of partitions in an RDD and return the results as an array.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T,U> void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#runJob(org.apache.spark.rdd.RDD,%20scala.Function2,%20scala.collection.Seq,%20scala.Function2,%20scala.reflect.ClassTag)">runJob</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, + scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, + scala.collection.Seq<Object> partitions, + scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, + scala.reflect.ClassTag<U> evidence$12)</code> +<div class="block">Run a function on a given set of partitions in an RDD and pass the results to the given + handler function.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#sequenceFile(java.lang.String,%20java.lang.Class,%20java.lang.Class)">sequenceFile</a></strong>(String path, + Class<K> keyClass, + Class<V> valueClass)</code> +<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#sequenceFile(java.lang.String,%20java.lang.Class,%20java.lang.Class,%20int)">sequenceFile</a></strong>(String path, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#sequenceFile(java.lang.String,%20int,%20scala.reflect.ClassTag,%20scala.reflect.ClassTag,%20scala.Function0,%20scala.Function0)">sequenceFile</a></strong>(String path, + int minPartitions, + scala.reflect.ClassTag<K> km, + scala.reflect.ClassTag<V> vm, + scala.Function0<org.apache.spark.WritableConverter<K>> kcf, + scala.Function0<org.apache.spark.WritableConverter<V>> vcf)</code> +<div class="block">Version of sequenceFile() for types implicitly convertible to Writables through a + WritableConverter.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#setCallSite(java.lang.String)">setCallSite</a></strong>(String shortCallSite)</code> +<div class="block">Set the thread-local property for overriding the call sites + of actions and RDDs.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#setCheckpointDir(java.lang.String)">setCheckpointDir</a></strong>(String directory)</code> +<div class="block">Set the directory under which RDDs are going to be checkpointed.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#setJobDescription(java.lang.String)">setJobDescription</a></strong>(String value)</code> +<div class="block">Set a human readable description of the current job.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#setJobGroup(java.lang.String,%20java.lang.String,%20boolean)">setJobGroup</a></strong>(String groupId, + String description, + boolean interruptOnCancel)</code> +<div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a + different value or cleared.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#setLocalProperty(java.lang.String,%20java.lang.String)">setLocalProperty</a></strong>(String key, + String value)</code> +<div class="block">Set a local property that affects jobs submitted from this thread, such as the Spark fair + scheduler pool.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#setLogLevel(java.lang.String)">setLogLevel</a></strong>(String logLevel)</code> +<div class="block">Control our logLevel.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#sparkUser()">sparkUser</a></strong>()</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>long</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#startTime()">startTime</a></strong>()</code> </td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/SparkStatusTracker.html" title="class in org.apache.spark">SparkStatusTracker</a></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#statusTracker()">statusTracker</a></strong>()</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#stop()">stop</a></strong>()</code> +<div class="block">Shut down the SparkContext.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T,U,R> <a href="../../../org/apache/spark/SimpleFutureAction.html" title="class in org.apache.spark">SimpleFutureAction</a><R></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#submitJob(org.apache.spark.rdd.RDD,%20scala.Function1,%20scala.collection.Seq,%20scala.Function2,%20scala.Function0)">submitJob</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, + scala.Function1<scala.collection.Iterator<T>,U> processPartition, + scala.collection.Seq<Object> partitions, + scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, + scala.Function0<R> resultFunc)</code> +<div class="block">Submit a job for execution and return a FutureJob holding the result.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#textFile(java.lang.String,%20int)">textFile</a></strong>(String path, + int minPartitions)</code> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code>scala.Option<String></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#uiWebUrl()">uiWebUrl</a></strong>()</code> </td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#union(org.apache.spark.rdd.RDD,%20scala.collection.Seq,%20scala.reflect.ClassTag)">union</a></strong>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> first, + scala.collection.Seq<<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T>> rest, + scala.reflect.ClassTag<T> evidence$7)</code> +<div class="block">Build the union of a list of RDDs passed as variable-length arguments.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#union(scala.collection.Seq,%20scala.reflect.ClassTag)">union</a></strong>(scala.collection.Seq<<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T>> rdds, + scala.reflect.ClassTag<T> evidence$6)</code> +<div class="block">Build the union of a list of RDDs.</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#version()">version</a></strong>()</code> +<div class="block">The version of Spark on which this application is running.</div> +</td> +</tr> +<tr class="altColor"> +<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<String,String>></code></td> +<td class="colLast"><code><strong><a href="../../../org/apache/spark/SparkContext.html#wholeTextFiles(java.lang.String,%20int)">wholeTextFiles</a></strong>(String path, + int minPartitions)</code> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI.</div> +</td> +</tr> +</table> +<ul class="blockList"> +<li class="blockList"><a name="methods_inherited_from_class_Object"> +<!-- --> +</a> +<h3>Methods inherited from class Object</h3> +<code>equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> +</ul> +</li> +</ul> +</li> +</ul> +</div> +<div class="details"> +<ul class="blockList"> +<li class="blockList"> +<!-- ========= CONSTRUCTOR DETAIL ======== --> +<ul class="blockList"> +<li class="blockList"><a name="constructor_detail"> +<!-- --> +</a> +<h3>Constructor Detail</h3> +<a name="SparkContext(org.apache.spark.SparkConf)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>SparkContext</h4> +<pre>public SparkContext(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> config)</pre> +</li> +</ul> +<a name="SparkContext()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>SparkContext</h4> +<pre>public SparkContext()</pre> +<div class="block">Create a SparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit).</div> +</li> +</ul> +<a name="SparkContext(java.lang.String, java.lang.String, org.apache.spark.SparkConf)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>SparkContext</h4> +<pre>public SparkContext(String master, + String appName, + <a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</pre> +<div class="block">Alternative constructor that allows setting common Spark properties directly + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd><dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd><dd><code>conf</code> - a <a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><code>SparkConf</code></a> object specifying other Spark parameters</dd></dl> +</li> +</ul> +<a name="SparkContext(java.lang.String, java.lang.String, java.lang.String, scala.collection.Seq, scala.collection.Map)"> +<!-- --> +</a> +<ul class="blockListLast"> +<li class="blockList"> +<h4>SparkContext</h4> +<pre>public SparkContext(String master, + String appName, + String sparkHome, + scala.collection.Seq<String> jars, + scala.collection.Map<String,String> environment)</pre> +<div class="block">Alternative constructor that allows setting common Spark properties directly + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd><dd><code>appName</code> - A name for your application, to display on the cluster web UI.</dd><dd><code>sparkHome</code> - Location where Spark is installed on cluster nodes.</dd><dd><code>jars</code> - Collection of JARs to send to the cluster. These can be paths on the local file + system or HDFS, HTTP, HTTPS, or FTP URLs.</dd><dd><code>environment</code> - Environment variables to set on worker nodes.</dd></dl> +</li> +</ul> +</li> +</ul> +<!-- ============ METHOD DETAIL ========== --> +<ul class="blockList"> +<li class="blockList"><a name="method_detail"> +<!-- --> +</a> +<h3>Method Detail</h3> +<a name="getOrCreate(org.apache.spark.SparkConf)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>getOrCreate</h4> +<pre>public static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> getOrCreate(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> config)</pre> +<div class="block">This function may be used to get or instantiate a SparkContext and register it as a + singleton object. Because we can only have one active SparkContext per JVM, + this is useful when applications may wish to share a SparkContext. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>config</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>This function cannot be used to create multiple SparkContext instances + even if multiple contexts are allowed.</dd></dl> +</li> +</ul> +<a name="getOrCreate()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>getOrCreate</h4> +<pre>public static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> getOrCreate()</pre> +<div class="block">This function may be used to get or instantiate a SparkContext and register it as a + singleton object. Because we can only have one active SparkContext per JVM, + this is useful when applications may wish to share a SparkContext. + <p> + This method allows not passing a SparkConf (useful if just retrieving). + <p></div> +<dl><dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>This function cannot be used to create multiple SparkContext instances + even if multiple contexts are allowed.</dd></dl> +</li> +</ul> +<a name="jarOfClass(java.lang.Class)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>jarOfClass</h4> +<pre>public static scala.Option<String> jarOfClass(Class<?> cls)</pre> +<div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass + their JARs to SparkContext.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>cls</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="jarOfObject(java.lang.Object)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>jarOfObject</h4> +<pre>public static scala.Option<String> jarOfObject(Object obj)</pre> +<div class="block">Find the JAR that contains the class of a particular object, to make it easy for users + to pass their JARs to SparkContext. In most cases you can call jarOfObject(this) in + your driver program.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>obj</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="startTime()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>startTime</h4> +<pre>public long startTime()</pre> +</li> +</ul> +<a name="getConf()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>getConf</h4> +<pre>public <a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> getConf()</pre> +<div class="block">Return a copy of this SparkContext's configuration. The configuration ''cannot'' be + changed at runtime.</div> +<dl><dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="jars()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>jars</h4> +<pre>public scala.collection.Seq<String> jars()</pre> +</li> +</ul> +<a name="files()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>files</h4> +<pre>public scala.collection.Seq<String> files()</pre> +</li> +</ul> +<a name="master()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>master</h4> +<pre>public String master()</pre> +</li> +</ul> +<a name="deployMode()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>deployMode</h4> +<pre>public String deployMode()</pre> +</li> +</ul> +<a name="appName()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>appName</h4> +<pre>public String appName()</pre> +</li> +</ul> +<a name="isLocal()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>isLocal</h4> +<pre>public boolean isLocal()</pre> +</li> +</ul> +<a name="isStopped()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>isStopped</h4> +<pre>public boolean isStopped()</pre> +<dl><dt><span class="strong">Returns:</span></dt><dd>true if context is stopped or in the midst of stopping.</dd></dl> +</li> +</ul> +<a name="statusTracker()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>statusTracker</h4> +<pre>public <a href="../../../org/apache/spark/SparkStatusTracker.html" title="class in org.apache.spark">SparkStatusTracker</a> statusTracker()</pre> +</li> +</ul> +<a name="uiWebUrl()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>uiWebUrl</h4> +<pre>public scala.Option<String> uiWebUrl()</pre> +</li> +</ul> +<a name="hadoopConfiguration()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopConfiguration</h4> +<pre>public org.apache.hadoop.conf.Configuration hadoopConfiguration()</pre> +<div class="block">A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. + <p></div> +<dl><dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>As it will be reused in all Hadoop RDDs, it's better not to modify it unless you + plan to set some global configurations for all Hadoop RDDs.</dd></dl> +</li> +</ul> +<a name="sparkUser()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>sparkUser</h4> +<pre>public String sparkUser()</pre> +</li> +</ul> +<a name="applicationId()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>applicationId</h4> +<pre>public String applicationId()</pre> +<div class="block">A unique identifier for the Spark application. + Its format depends on the scheduler implementation. + (i.e. + in case of local spark app something like 'local-1433865536131' + in case of YARN something like 'application_1433865536131_34483' + )</div> +<dl><dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="applicationAttemptId()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>applicationAttemptId</h4> +<pre>public scala.Option<String> applicationAttemptId()</pre> +</li> +</ul> +<a name="setLogLevel(java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>setLogLevel</h4> +<pre>public void setLogLevel(String logLevel)</pre> +<div class="block">Control our logLevel. This overrides any user-defined log settings.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>logLevel</code> - The desired log level as a string. + Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN</dd></dl> +</li> +</ul> +<a name="setLocalProperty(java.lang.String, java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>setLocalProperty</h4> +<pre>public void setLocalProperty(String key, + String value)</pre> +<div class="block">Set a local property that affects jobs submitted from this thread, such as the Spark fair + scheduler pool. User-defined properties may also be set here. These properties are propagated + through to worker tasks and can be accessed there via + <a href="../../../org/apache/spark/TaskContext.html#getLocalProperty(java.lang.String)"><code>TaskContext.getLocalProperty(java.lang.String)</code></a>. + <p> + These properties are inherited by child threads spawned from this thread. This + may have unexpected consequences when working with thread pools. The standard java + implementation of thread pools have worker threads spawn other worker threads. + As a result, local properties may propagate unpredictably.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>key</code> - (undocumented)</dd><dd><code>value</code> - (undocumented)</dd></dl> +</li> +</ul> +<a name="getLocalProperty(java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>getLocalProperty</h4> +<pre>public String getLocalProperty(String key)</pre> +<div class="block">Get a local property set in this thread, or null if it is missing. See + <code>org.apache.spark.SparkContext.setLocalProperty</code>.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>key</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="setJobDescription(java.lang.String)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>setJobDescription</h4> +<pre>public void setJobDescription(String value)</pre> +<div class="block">Set a human readable description of the current job.</div> +</li> +</ul> +<a name="setJobGroup(java.lang.String, java.lang.String, boolean)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>setJobGroup</h4> +<pre>public void setJobGroup(String groupId, + String description, + boolean interruptOnCancel)</pre> +<div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a + different value or cleared. + <p> + Often, a unit of execution in an application consists of multiple Spark actions or jobs. + Application programmers can use this method to group all those jobs together and give a + group description. Once set, the Spark web UI will associate such jobs with this group. + <p> + The application can also use <code>org.apache.spark.SparkContext.cancelJobGroup</code> to cancel all + running jobs in this group. For example, + <pre><code> + // In the main thread: + sc.setJobGroup("some_job_to_cancel", "some job description") + sc.parallelize(1 to 10000, 2).map { i => Thread.sleep(10); i }.count() + + // In a separate thread: + sc.cancelJobGroup("some_job_to_cancel") + </code></pre> + <p> + If interruptOnCancel is set to true for the job group, then job cancellation will result + in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure + that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, + where HDFS may respond to Thread.interrupt() by marking nodes as dead.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>groupId</code> - (undocumented)</dd><dd><code>description</code> - (undocumented)</dd><dd><code>interruptOnCancel</code> - (undocumented)</dd></dl> +</li> +</ul> +<a name="clearJobGroup()"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>clearJobGroup</h4> +<pre>public void clearJobGroup()</pre> +<div class="block">Clear the current thread's job group ID and its description.</div> +</li> +</ul> +<a name="parallelize(scala.collection.Seq, int, scala.reflect.ClassTag)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelize</h4> +<pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> parallelize(scala.collection.Seq<T> seq, + int numSlices, + scala.reflect.ClassTag<T> evidence$1)</pre> +<div class="block">Distribute a local Scala collection to form an RDD. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>seq</code> - (undocumented)</dd><dd><code>numSlices</code> - (undocumented)</dd><dd><code>evidence$1</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Parallelize acts lazily. If <code>seq</code> is a mutable collection and is altered after the call + to parallelize and before the first action on the RDD, the resultant RDD will reflect the + modified collection. Pass a copy of the argument to avoid this., avoid using <code>parallelize(Seq())</code> to create an empty <code>RDD</code>. Consider <code>emptyRDD</code> for an + RDD with no partitions, or <code>parallelize(Seq[T]())</code> for an RDD of <code>T</code> with empty partitions.</dd></dl> +</li> +</ul> +<a name="range(long, long, long, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>range</h4> +<pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><Object> range(long start, + long end, + long step, + int numSlices)</pre> +<div class="block">Creates a new RDD[Long] containing elements from <code>start</code> to <code>end</code>(exclusive), increased by + <code>step</code> every element. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>start</code> - the start value.</dd><dd><code>end</code> - the end value.</dd><dd><code>step</code> - the incremental step</dd><dd><code>numSlices</code> - the partition number of the new RDD.</dd> +<dt><span class="strong">Returns:</span></dt><dd></dd><dt><span class="strong">Note:</span></dt> + <dd>if we need to cache this RDD, we should make sure each partition does not exceed limit. + <p></dd></dl> +</li> +</ul> +<a name="makeRDD(scala.collection.Seq, int, scala.reflect.ClassTag)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>makeRDD</h4> +<pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> makeRDD(scala.collection.Seq<T> seq, + int numSlices, + scala.reflect.ClassTag<T> evidence$2)</pre> +<div class="block">Distribute a local Scala collection to form an RDD. + <p> + This method is identical to <code>parallelize</code>.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>seq</code> - (undocumented)</dd><dd><code>numSlices</code> - (undocumented)</dd><dd><code>evidence$2</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="makeRDD(scala.collection.Seq, scala.reflect.ClassTag)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>makeRDD</h4> +<pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> makeRDD(scala.collection.Seq<scala.Tuple2<T,scala.collection.Seq<String>>> seq, + scala.reflect.ClassTag<T> evidence$3)</pre> +<div class="block">Distribute a local Scala collection to form an RDD, with one or more + location preferences (hostnames of Spark nodes) for each object. + Create a new partition for each collection item.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>seq</code> - (undocumented)</dd><dd><code>evidence$3</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="textFile(java.lang.String, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>textFile</h4> +<pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String> textFile(String path, + int minPartitions)</pre> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl> +</li> +</ul> +<a name="wholeTextFiles(java.lang.String, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>wholeTextFiles</h4> +<pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<String,String>> wholeTextFiles(String path, + int minPartitions)</pre> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI. Each file is read as a single record and returned in a + key-value pair, where the key is the path of each file, the value is the content of each file. + <p> + <p> For example, if you have the following files: + <pre><code> + hdfs://a-hdfs-path/part-00000 + hdfs://a-hdfs-path/part-00001 + ... + hdfs://a-hdfs-path/part-nnnnn + </code></pre> + <p> + Do <code>val rdd = sparkContext.wholeTextFile("hdfs://a-hdfs-path")</code>, + <p> + <p> then <code>rdd</code> contains + <pre><code> + (a-hdfs-path/part-00000, its content) + (a-hdfs-path/part-00001, its content) + ... + (a-hdfs-path/part-nnnnn, its content) + </code></pre> + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - Directory to the input data files, the path can be comma separated paths as the + list of inputs.</dd><dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Small files are preferred, large file is also allowable, but may cause bad performance., On some filesystems, <code>.../path/&#42;</code> can be a more efficient way to read all files + in a directory rather than <code>.../path/</code> or <code>.../path</code> + <p></dd></dl> +</li> +</ul> +<a name="binaryFiles(java.lang.String, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>binaryFiles</h4> +<pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<String,<a href="../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>>> binaryFiles(String path, + int minPartitions)</pre> +<div class="block">Get an RDD for a Hadoop-readable dataset as PortableDataStream for each file + (useful for binary data) + <p> + For example, if you have the following files: + <pre><code> + hdfs://a-hdfs-path/part-00000 + hdfs://a-hdfs-path/part-00001 + ... + hdfs://a-hdfs-path/part-nnnnn + </code></pre> + <p> + Do + <code>val rdd = sparkContext.binaryFiles("hdfs://a-hdfs-path")</code>, + <p> + then <code>rdd</code> contains + <pre><code> + (a-hdfs-path/part-00000, its content) + (a-hdfs-path/part-00001, its content) + ... + (a-hdfs-path/part-nnnnn, its content) + </code></pre> + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - Directory to the input data files, the path can be comma separated paths as the + list of inputs.</dd><dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Small files are preferred; very large files may cause bad performance., On some filesystems, <code>.../path/&#42;</code> can be a more efficient way to read all files + in a directory rather than <code>.../path/</code> or <code>.../path</code> + <p></dd></dl> +</li> +</ul> +<a name="binaryRecords(java.lang.String, int, org.apache.hadoop.conf.Configuration)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>binaryRecords</h4> +<pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><byte[]> binaryRecords(String path, + int recordLength, + org.apache.hadoop.conf.Configuration conf)</pre> +<div class="block">Load data from a flat binary file, assuming the length of each record is constant. + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - Directory to the input data files, the path can be comma separated paths as the + list of inputs.</dd><dd><code>recordLength</code> - The length at which to split the records</dd><dd><code>conf</code> - Configuration for setting up the dataset. + <p></dd> +<dt><span class="strong">Returns:</span></dt><dd>An RDD of data with values, represented as byte arrays</dd><dt><span class="strong">Note:</span></dt> + <dd>We ensure that the byte array for each record in the resulting RDD + has the provided record length. + <p></dd></dl> +</li> +</ul> +<a name="hadoopRDD(org.apache.hadoop.mapred.JobConf, java.lang.Class, java.lang.Class, java.lang.Class, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopRDD</h4> +<pre>public <K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> hadoopRDD(org.apache.hadoop.mapred.JobConf conf, + Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</pre> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and other + necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable), + using the older MapReduce API (<code>org.apache.hadoop.mapred</code>). + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>conf</code> - JobConf for setting up the dataset. Note: This will be put into a Broadcast. + Therefore if you plan to reuse this conf to create multiple RDDs, you need to make + sure you won't modify the conf. A safe approach is always creating a new conf for + a new RDD.</dd><dd><code>inputFormatClass</code> - Class of the InputFormat</dd><dd><code>keyClass</code> - Class of the keys</dd><dd><code>valueClass</code> - Class of the values</dd><dd><code>minPartitions</code> - Minimum number of Hadoop Splits to generate. + <p></dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD or directly passing it to an aggregation or shuffle + operation will create many references to the same object. + If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first + copy them using a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="hadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class, int)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopFile</h4> +<pre>public <K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> hadoopFile(String path, + Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</pre> +<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>inputFormatClass</code> - (undocumented)</dd><dd><code>keyClass</code> - (undocumented)</dd><dd><code>valueClass</code> - (undocumented)</dd><dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD or directly passing it to an aggregation or shuffle + operation will create many references to the same object. + If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first + copy them using a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="hadoopFile(java.lang.String, int, scala.reflect.ClassTag, scala.reflect.ClassTag, scala.reflect.ClassTag)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopFile</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> hadoopFile(String path, + int minPartitions, + scala.reflect.ClassTag<K> km, + scala.reflect.ClassTag<V> vm, + scala.reflect.ClassTag<F> fm)</pre> +<div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys, + values and the InputFormat so that users don't need to pass them directly. Instead, callers + can just write, for example, + <pre><code> + val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minPartitions) + </code></pre> + <p></div> +<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>minPartitions</code> - (undocumented)</dd><dd><code>km</code> - (undocumented)</dd><dd><code>vm</code> - (undocumented)</dd><dd><code>fm</code> - (undocumented)</dd> +<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd><dt><span class="strong">Note:</span></dt> + <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD or directly passing it to an aggregation or shuffle + operation will create many references to the same object. + If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first + copy them using a <code>map</code> function.</dd></dl> +</li> +</ul> +<a name="hadoopFile(java.lang.String, scala.reflect.ClassTag, scala.reflect.ClassTag, scala.reflect.ClassTag)"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopFile</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> hadoopFile(String path, + scala.reflect.ClassTag<K> km, + scala.reflect.ClassTag<V> vm, +
<TRUNCATED> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org