http://git-wip-us.apache.org/repos/asf/spark-website/blob/04a27dbf/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkContext.html ---------------------------------------------------------------------- diff --git a/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkContext.html b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkContext.html new file mode 100644 index 0000000..7d037c3 --- /dev/null +++ b/site/docs/2.3.2/api/java/org/apache/spark/api/java/JavaSparkContext.html @@ -0,0 +1,2389 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<!-- NewPage --> +<html lang="en"> +<head> +<!-- Generated by javadoc (1.8.0_181) on Mon Sep 17 19:57:34 CST 2018 --> +<title>JavaSparkContext (Spark 2.3.2 JavaDoc)</title> +<meta name="date" content="2018-09-17"> +<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style"> +<script type="text/javascript" src="../../../../../script.js"></script> +</head> +<body> +<script type="text/javascript"><!-- + try { + if (location.href.indexOf('is-external=true') == -1) { + parent.document.title="JavaSparkContext (Spark 2.3.2 JavaDoc)"; + } + } + catch(err) { + } +//--> +var methods = {"i0":42,"i1":42,"i2":42,"i3":42,"i4":42,"i5":42,"i6":42,"i7":42,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":10,"i20":10,"i21":10,"i22":10,"i23":42,"i24":42,"i25":10,"i26":9,"i27":10,"i28":10,"i29":10,"i30":10,"i31":10,"i32":10,"i33":10,"i34":10,"i35":10,"i36":10,"i37":42,"i38":42,"i39":10,"i40":9,"i41":9,"i42":10,"i43":10,"i44":10,"i45":10,"i46":10,"i47":10,"i48":10,"i49":10,"i50":10,"i51":10,"i52":10,"i53":10,"i54":10,"i55":10,"i56":10,"i57":10,"i58":10,"i59":10,"i60":10,"i61":10,"i62":10,"i63":10,"i64":10,"i65":10,"i66":10,"i67":10,"i68":10,"i69":10,"i70":9,"i71":10,"i72":10,"i73":10,"i74":10,"i75":10,"i76":10,"i77":10,"i78":10,"i79":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"],32:["t6","Deprecated Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; +</script> +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<!-- ========= START OF TOP NAVBAR ======= --> +<div class="topNav"><a name="navbar.top"> +<!-- --> +</a> +<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> +<a name="navbar.top.firstrow"> +<!-- --> +</a> +<ul class="navList" title="Navigation"> +<li><a href="../../../../../overview-summary.html">Overview</a></li> +<li><a href="package-summary.html">Package</a></li> +<li class="navBarCell1Rev">Class</li> +<li><a href="package-tree.html">Tree</a></li> +<li><a href="../../../../../deprecated-list.html">Deprecated</a></li> +<li><a href="../../../../../index-all.html">Index</a></li> +<li><a href="../../../../../help-doc.html">Help</a></li> +</ul> +</div> +<div class="subNav"> +<ul class="navList"> +<li><a href="../../../../../org/apache/spark/api/java/JavaRDDLike.html" title="interface in org.apache.spark.api.java"><span class="typeNameLink">Prev Class</span></a></li> +<li><a href="../../../../../org/apache/spark/api/java/JavaSparkStatusTracker.html" title="class in org.apache.spark.api.java"><span class="typeNameLink">Next Class</span></a></li> +</ul> +<ul class="navList"> +<li><a href="../../../../../index.html?org/apache/spark/api/java/JavaSparkContext.html" target="_top">Frames</a></li> +<li><a href="JavaSparkContext.html" target="_top">No Frames</a></li> +</ul> +<ul class="navList" id="allclasses_navbar_top"> +<li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> +</ul> +<div> +<script type="text/javascript"><!-- + allClassesLink = document.getElementById("allclasses_navbar_top"); + if(window==top) { + allClassesLink.style.display = "block"; + } + else { + allClassesLink.style.display = "none"; + } + //--> +</script> +</div> +<div> +<ul class="subNavList"> +<li>Summary: </li> +<li>Nested | </li> +<li>Field | </li> +<li><a href="#constructor.summary">Constr</a> | </li> +<li><a href="#method.summary">Method</a></li> +</ul> +<ul class="subNavList"> +<li>Detail: </li> +<li>Field | </li> +<li><a href="#constructor.detail">Constr</a> | </li> +<li><a href="#method.detail">Method</a></li> +</ul> +</div> +<a name="skip.navbar.top"> +<!-- --> +</a></div> +<!-- ========= END OF TOP NAVBAR ========= --> +<!-- ======== START OF CLASS DATA ======== --> +<div class="header"> +<div class="subTitle">org.apache.spark.api.java</div> +<h2 title="Class JavaSparkContext" class="title">Class JavaSparkContext</h2> +</div> +<div class="contentContainer"> +<ul class="inheritance"> +<li>Object</li> +<li> +<ul class="inheritance"> +<li>org.apache.spark.api.java.JavaSparkContext</li> +</ul> +</li> +</ul> +<div class="description"> +<ul class="blockList"> +<li class="blockList"> +<dl> +<dt>All Implemented Interfaces:</dt> +<dd>java.io.Closeable, AutoCloseable</dd> +</dl> +<hr> +<br> +<pre>public class <span class="typeNameLabel">JavaSparkContext</span> +extends Object +implements java.io.Closeable</pre> +<div class="block">A Java-friendly version of <a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark"><code>SparkContext</code></a> that returns + <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java"><code>JavaRDD</code></a>s and works with Java collections instead of Scala ones. + <p> + Only one SparkContext may be active per JVM. You must <code>stop()</code> the active SparkContext before + creating a new one. This limitation may eventually be removed; see SPARK-2243 for more details.</div> +</li> +</ul> +</div> +<div class="summary"> +<ul class="blockList"> +<li class="blockList"> +<!-- ======== CONSTRUCTOR SUMMARY ======== --> +<ul class="blockList"> +<li class="blockList"><a name="constructor.summary"> +<!-- --> +</a> +<h3>Constructor Summary</h3> +<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> +<caption><span>Constructors</span><span class="tabEnd"> </span></caption> +<tr> +<th class="colOne" scope="col">Constructor and Description</th> +</tr> +<tr class="altColor"> +<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext--">JavaSparkContext</a></span>()</code> +<div class="block">Create a JavaSparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit).</div> +</td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext-org.apache.spark.SparkConf-">JavaSparkContext</a></span>(<a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</code> </td> +</tr> +<tr class="altColor"> +<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext-org.apache.spark.SparkContext-">JavaSparkContext</a></span>(<a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext-java.lang.String-java.lang.String-">JavaSparkContext</a></span>(String master, + String appName)</code> </td> +</tr> +<tr class="altColor"> +<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext-java.lang.String-java.lang.String-org.apache.spark.SparkConf-">JavaSparkContext</a></span>(String master, + String appName, + <a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">JavaSparkContext</a></span>(String master, + String appName, + String sparkHome, + String jarFile)</code> </td> +</tr> +<tr class="altColor"> +<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext-java.lang.String-java.lang.String-java.lang.String-java.lang.String:A-">JavaSparkContext</a></span>(String master, + String appName, + String sparkHome, + String[] jars)</code> </td> +</tr> +<tr class="rowColor"> +<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#JavaSparkContext-java.lang.String-java.lang.String-java.lang.String-java.lang.String:A-java.util.Map-">JavaSparkContext</a></span>(String master, + String appName, + String sparkHome, + String[] jars, + java.util.Map<String,String> environment)</code> </td> +</tr> +</table> +</li> +</ul> +<!-- ========== METHOD SUMMARY =========== --> +<ul class="blockList"> +<li class="blockList"><a name="method.summary"> +<!-- --> +</a> +<h3>Method Summary</h3> +<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> +<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span><span id="t6" class="tableTab"><span><a href="javascript:show(32);">Deprecated Methods</a></span><span class="tabEnd"> </span></span></caption> +<tr> +<th class="colFirst" scope="col">Modifier and Type</th> +<th class="colLast" scope="col">Method and Description</th> +</tr> +<tr id="i0" class="altColor"> +<td class="colFirst"><code><T,R> <a href="../../../../../org/apache/spark/Accumulable.html" title="class in org.apache.spark">Accumulable</a><T,R></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulable-T-org.apache.spark.AccumulableParam-">accumulable</a></span>(T initialValue, + <a href="../../../../../org/apache/spark/AccumulableParam.html" title="interface in org.apache.spark">AccumulableParam</a><T,R> param)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use AccumulatorV2. Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i1" class="rowColor"> +<td class="colFirst"><code><T,R> <a href="../../../../../org/apache/spark/Accumulable.html" title="class in org.apache.spark">Accumulable</a><T,R></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulable-T-java.lang.String-org.apache.spark.AccumulableParam-">accumulable</a></span>(T initialValue, + String name, + <a href="../../../../../org/apache/spark/AccumulableParam.html" title="interface in org.apache.spark">AccumulableParam</a><T,R> param)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use AccumulatorV2. Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i2" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Double></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator-double-">accumulator</a></span>(double initialValue)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use sc().doubleAccumulator(). Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i3" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Double></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator-double-java.lang.String-">accumulator</a></span>(double initialValue, + String name)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use sc().doubleAccumulator(String). Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i4" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Integer></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator-int-">accumulator</a></span>(int initialValue)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use sc().longAccumulator(). Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i5" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Integer></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator-int-java.lang.String-">accumulator</a></span>(int initialValue, + String name)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use sc().longAccumulator(String). Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i6" class="altColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator-T-org.apache.spark.AccumulatorParam-">accumulator</a></span>(T initialValue, + <a href="../../../../../org/apache/spark/AccumulatorParam.html" title="interface in org.apache.spark">AccumulatorParam</a><T> accumulatorParam)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use AccumulatorV2. Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i7" class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#accumulator-T-java.lang.String-org.apache.spark.AccumulatorParam-">accumulator</a></span>(T initialValue, + String name, + <a href="../../../../../org/apache/spark/AccumulatorParam.html" title="interface in org.apache.spark">AccumulatorParam</a><T> accumulatorParam)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use AccumulatorV2. Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i8" class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#addFile-java.lang.String-">addFile</a></span>(String path)</code> +<div class="block">Add a file to be downloaded with this Spark job on every node.</div> +</td> +</tr> +<tr id="i9" class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#addFile-java.lang.String-boolean-">addFile</a></span>(String path, + boolean recursive)</code> +<div class="block">Add a file to be downloaded with this Spark job on every node.</div> +</td> +</tr> +<tr id="i10" class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#addJar-java.lang.String-">addJar</a></span>(String path)</code> +<div class="block">Adds a JAR dependency for all tasks to be executed on this SparkContext in the future.</div> +</td> +</tr> +<tr id="i11" class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#appName--">appName</a></span>()</code> </td> +</tr> +<tr id="i12" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,<a href="../../../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#binaryFiles-java.lang.String-">binaryFiles</a></span>(String path)</code> +<div class="block">Read a directory of binary files from HDFS, a local file system (available on all nodes), + or any Hadoop-supported file system URI as a byte array.</div> +</td> +</tr> +<tr id="i13" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,<a href="../../../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#binaryFiles-java.lang.String-int-">binaryFiles</a></span>(String path, + int minPartitions)</code> +<div class="block">Read a directory of binary files from HDFS, a local file system (available on all nodes), + or any Hadoop-supported file system URI as a byte array.</div> +</td> +</tr> +<tr id="i14" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><byte[]></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#binaryRecords-java.lang.String-int-">binaryRecords</a></span>(String path, + int recordLength)</code> +<div class="block">Load data from a flat binary file, assuming the length of each record is constant.</div> +</td> +</tr> +<tr id="i15" class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast">Broadcast</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#broadcast-T-">broadcast</a></span>(T value)</code> +<div class="block">Broadcast a read-only variable to the cluster, returning a + <a href="../../../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast"><code>Broadcast</code></a> object for reading it in distributed functions.</div> +</td> +</tr> +<tr id="i16" class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#cancelAllJobs--">cancelAllJobs</a></span>()</code> +<div class="block">Cancel all jobs that have been scheduled or are running.</div> +</td> +</tr> +<tr id="i17" class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#cancelJobGroup-java.lang.String-">cancelJobGroup</a></span>(String groupId)</code> +<div class="block">Cancel active jobs for the specified group.</div> +</td> +</tr> +<tr id="i18" class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#clearCallSite--">clearCallSite</a></span>()</code> +<div class="block">Pass-through to SparkContext.setCallSite.</div> +</td> +</tr> +<tr id="i19" class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#clearJobGroup--">clearJobGroup</a></span>()</code> +<div class="block">Clear the current thread's job group ID and its description.</div> +</td> +</tr> +<tr id="i20" class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#close--">close</a></span>()</code> </td> +</tr> +<tr id="i21" class="rowColor"> +<td class="colFirst"><code>Integer</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#defaultMinPartitions--">defaultMinPartitions</a></span>()</code> +<div class="block">Default min number of partitions for Hadoop RDDs when not given by user</div> +</td> +</tr> +<tr id="i22" class="altColor"> +<td class="colFirst"><code>Integer</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#defaultParallelism--">defaultParallelism</a></span>()</code> +<div class="block">Default level of parallelism to use when not given by user (e.g.</div> +</td> +</tr> +<tr id="i23" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Double></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#doubleAccumulator-double-">doubleAccumulator</a></span>(double initialValue)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use sc().doubleAccumulator(). Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i24" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Double></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#doubleAccumulator-double-java.lang.String-">doubleAccumulator</a></span>(double initialValue, + String name)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use sc().doubleAccumulator(String). Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i25" class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#emptyRDD--">emptyRDD</a></span>()</code> +<div class="block">Get an RDD that has no partitions or elements.</div> +</td> +</tr> +<tr id="i26" class="altColor"> +<td class="colFirst"><code>static <a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html" title="class in org.apache.spark.api.java">JavaSparkContext</a></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#fromSparkContext-org.apache.spark.SparkContext-">fromSparkContext</a></span>(<a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc)</code> </td> +</tr> +<tr id="i27" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/Optional.html" title="class in org.apache.spark.api.java">Optional</a><String></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getCheckpointDir--">getCheckpointDir</a></span>()</code> </td> +</tr> +<tr id="i28" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getConf--">getConf</a></span>()</code> +<div class="block">Return a copy of this JavaSparkContext's configuration.</div> +</td> +</tr> +<tr id="i29" class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getLocalProperty-java.lang.String-">getLocalProperty</a></span>(String key)</code> +<div class="block">Get a local property set in this thread, or null if it is missing.</div> +</td> +</tr> +<tr id="i30" class="altColor"> +<td class="colFirst"><code>java.util.Map<Integer,<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><?>></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getPersistentRDDs--">getPersistentRDDs</a></span>()</code> +<div class="block">Returns a Java map of JavaRDDs that have marked themselves as persistent via cache() call.</div> +</td> +</tr> +<tr id="i31" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/Optional.html" title="class in org.apache.spark.api.java">Optional</a><String></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#getSparkHome--">getSparkHome</a></span>()</code> +<div class="block">Get Spark's home location from either a value set through the constructor, + or the spark.home Java property, or the SPARK_HOME environment variable + (in that order of preference).</div> +</td> +</tr> +<tr id="i32" class="altColor"> +<td class="colFirst"><code>org.apache.hadoop.conf.Configuration</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopConfiguration--">hadoopConfiguration</a></span>()</code> +<div class="block">Returns the Hadoop configuration used for the Hadoop code (e.g.</div> +</td> +</tr> +<tr id="i33" class="rowColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>><br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-">hadoopFile</a></span>(String path, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass)</code> +<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat</div> +</td> +</tr> +<tr id="i34" class="altColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>><br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-int-">hadoopFile</a></span>(String path, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat.</div> +</td> +</tr> +<tr id="i35" class="rowColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>><br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopRDD-org.apache.hadoop.mapred.JobConf-java.lang.Class-java.lang.Class-java.lang.Class-">hadoopRDD</a></span>(org.apache.hadoop.mapred.JobConf conf, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass)</code> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any + other necessary info (e.g.</div> +</td> +</tr> +<tr id="i36" class="altColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>><br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#hadoopRDD-org.apache.hadoop.mapred.JobConf-java.lang.Class-java.lang.Class-java.lang.Class-int-">hadoopRDD</a></span>(org.apache.hadoop.mapred.JobConf conf, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any + other necessary info (e.g.</div> +</td> +</tr> +<tr id="i37" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Integer></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#intAccumulator-int-">intAccumulator</a></span>(int initialValue)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use sc().longAccumulator(). Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i38" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/Accumulator.html" title="class in org.apache.spark">Accumulator</a><Integer></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#intAccumulator-int-java.lang.String-">intAccumulator</a></span>(int initialValue, + String name)</code> +<div class="block"><span class="deprecatedLabel">Deprecated.</span> +<div class="block"><span class="deprecationComment">use sc().longAccumulator(String). Since 2.0.0.</span></div> +</div> +</td> +</tr> +<tr id="i39" class="rowColor"> +<td class="colFirst"><code>Boolean</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#isLocal--">isLocal</a></span>()</code> </td> +</tr> +<tr id="i40" class="altColor"> +<td class="colFirst"><code>static String[]</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#jarOfClass-java.lang.Class-">jarOfClass</a></span>(Class<?> cls)</code> +<div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass + their JARs to SparkContext.</div> +</td> +</tr> +<tr id="i41" class="rowColor"> +<td class="colFirst"><code>static String[]</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#jarOfObject-java.lang.Object-">jarOfObject</a></span>(Object obj)</code> +<div class="block">Find the JAR that contains the class of a particular object, to make it easy for users + to pass their JARs to SparkContext.</div> +</td> +</tr> +<tr id="i42" class="altColor"> +<td class="colFirst"><code>java.util.List<String></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#jars--">jars</a></span>()</code> </td> +</tr> +<tr id="i43" class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#master--">master</a></span>()</code> </td> +</tr> +<tr id="i44" class="altColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>><br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#newAPIHadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-org.apache.hadoop.conf.Configuration-">newAPIHadoopFile</a></span>(String path, + Class<F> fClass, + Class<K> kClass, + Class<V> vClass, + org.apache.hadoop.conf.Configuration conf)</code> +<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat + and extra configuration options to pass to the input format.</div> +</td> +</tr> +<tr id="i45" class="rowColor"> +<td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>><br><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#newAPIHadoopRDD-org.apache.hadoop.conf.Configuration-java.lang.Class-java.lang.Class-java.lang.Class-">newAPIHadoopRDD</a></span>(org.apache.hadoop.conf.Configuration conf, + Class<F> fClass, + Class<K> kClass, + Class<V> vClass)</code> +<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat + and extra configuration options to pass to the input format.</div> +</td> +</tr> +<tr id="i46" class="altColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#objectFile-java.lang.String-">objectFile</a></span>(String path)</code> +<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and + BytesWritable values that contain a serialized partition.</div> +</td> +</tr> +<tr id="i47" class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#objectFile-java.lang.String-int-">objectFile</a></span>(String path, + int minPartitions)</code> +<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and + BytesWritable values that contain a serialized partition.</div> +</td> +</tr> +<tr id="i48" class="altColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelize-java.util.List-">parallelize</a></span>(java.util.List<T> list)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr id="i49" class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelize-java.util.List-int-">parallelize</a></span>(java.util.List<T> list, + int numSlices)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr id="i50" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelizeDoubles-java.util.List-">parallelizeDoubles</a></span>(java.util.List<Double> list)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr id="i51" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelizeDoubles-java.util.List-int-">parallelizeDoubles</a></span>(java.util.List<Double> list, + int numSlices)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr id="i52" class="altColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelizePairs-java.util.List-">parallelizePairs</a></span>(java.util.List<scala.Tuple2<K,V>> list)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr id="i53" class="rowColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#parallelizePairs-java.util.List-int-">parallelizePairs</a></span>(java.util.List<scala.Tuple2<K,V>> list, + int numSlices)</code> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</td> +</tr> +<tr id="i54" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#sc--">sc</a></span>()</code> </td> +</tr> +<tr id="i55" class="rowColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-">sequenceFile</a></span>(String path, + Class<K> keyClass, + Class<V> valueClass)</code> +<div class="block">Get an RDD for a Hadoop SequenceFile.</div> +</td> +</tr> +<tr id="i56" class="altColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-int-">sequenceFile</a></span>(String path, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</code> +<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.</div> +</td> +</tr> +<tr id="i57" class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setCallSite-java.lang.String-">setCallSite</a></span>(String site)</code> +<div class="block">Pass-through to SparkContext.setCallSite.</div> +</td> +</tr> +<tr id="i58" class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setCheckpointDir-java.lang.String-">setCheckpointDir</a></span>(String dir)</code> +<div class="block">Set the directory under which RDDs are going to be checkpointed.</div> +</td> +</tr> +<tr id="i59" class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setJobDescription-java.lang.String-">setJobDescription</a></span>(String value)</code> +<div class="block">Set a human readable description of the current job.</div> +</td> +</tr> +<tr id="i60" class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setJobGroup-java.lang.String-java.lang.String-">setJobGroup</a></span>(String groupId, + String description)</code> +<div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a + different value or cleared.</div> +</td> +</tr> +<tr id="i61" class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setJobGroup-java.lang.String-java.lang.String-boolean-">setJobGroup</a></span>(String groupId, + String description, + boolean interruptOnCancel)</code> +<div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a + different value or cleared.</div> +</td> +</tr> +<tr id="i62" class="altColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setLocalProperty-java.lang.String-java.lang.String-">setLocalProperty</a></span>(String key, + String value)</code> +<div class="block">Set a local property that affects jobs submitted from this thread, and all child + threads, such as the Spark fair scheduler pool.</div> +</td> +</tr> +<tr id="i63" class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#setLogLevel-java.lang.String-">setLogLevel</a></span>(String logLevel)</code> +<div class="block">Control our logLevel.</div> +</td> +</tr> +<tr id="i64" class="altColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#sparkUser--">sparkUser</a></span>()</code> </td> +</tr> +<tr id="i65" class="rowColor"> +<td class="colFirst"><code>Long</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#startTime--">startTime</a></span>()</code> </td> +</tr> +<tr id="i66" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaSparkStatusTracker.html" title="class in org.apache.spark.api.java">JavaSparkStatusTracker</a></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#statusTracker--">statusTracker</a></span>()</code> </td> +</tr> +<tr id="i67" class="rowColor"> +<td class="colFirst"><code>void</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#stop--">stop</a></span>()</code> +<div class="block">Shut down the SparkContext.</div> +</td> +</tr> +<tr id="i68" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><String></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#textFile-java.lang.String-">textFile</a></span>(String path)</code> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +</td> +</tr> +<tr id="i69" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><String></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#textFile-java.lang.String-int-">textFile</a></span>(String path, + int minPartitions)</code> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +</td> +</tr> +<tr id="i70" class="altColor"> +<td class="colFirst"><code>static <a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#toSparkContext-org.apache.spark.api.java.JavaSparkContext-">toSparkContext</a></span>(<a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html" title="class in org.apache.spark.api.java">JavaSparkContext</a> jsc)</code> </td> +</tr> +<tr id="i71" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union-org.apache.spark.api.java.JavaDoubleRDD...-">union</a></span>(<a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a>... rdds)</code> </td> +</tr> +<tr id="i72" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union-org.apache.spark.api.java.JavaDoubleRDD-java.util.List-">union</a></span>(<a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a> first, + java.util.List<<a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a>> rest)</code> +<div class="block">Build the union of two or more RDDs.</div> +</td> +</tr> +<tr id="i73" class="rowColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union-org.apache.spark.api.java.JavaPairRDD...-">union</a></span>(<a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V>... rdds)</code> </td> +</tr> +<tr id="i74" class="altColor"> +<td class="colFirst"><code><K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union-org.apache.spark.api.java.JavaPairRDD-java.util.List-">union</a></span>(<a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> first, + java.util.List<<a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V>> rest)</code> +<div class="block">Build the union of two or more RDDs.</div> +</td> +</tr> +<tr id="i75" class="rowColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union-org.apache.spark.api.java.JavaRDD...-">union</a></span>(<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T>... rdds)</code> </td> +</tr> +<tr id="i76" class="altColor"> +<td class="colFirst"><code><T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#union-org.apache.spark.api.java.JavaRDD-java.util.List-">union</a></span>(<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> first, + java.util.List<<a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T>> rest)</code> +<div class="block">Build the union of two or more RDDs.</div> +</td> +</tr> +<tr id="i77" class="rowColor"> +<td class="colFirst"><code>String</code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#version--">version</a></span>()</code> +<div class="block">The version of Spark on which this application is running.</div> +</td> +</tr> +<tr id="i78" class="altColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,String></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#wholeTextFiles-java.lang.String-">wholeTextFiles</a></span>(String path)</code> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI.</div> +</td> +</tr> +<tr id="i79" class="rowColor"> +<td class="colFirst"><code><a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,String></code></td> +<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html#wholeTextFiles-java.lang.String-int-">wholeTextFiles</a></span>(String path, + int minPartitions)</code> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI.</div> +</td> +</tr> +</table> +<ul class="blockList"> +<li class="blockList"><a name="methods.inherited.from.class.Object"> +<!-- --> +</a> +<h3>Methods inherited from class Object</h3> +<code>equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> +</ul> +</li> +</ul> +</li> +</ul> +</div> +<div class="details"> +<ul class="blockList"> +<li class="blockList"> +<!-- ========= CONSTRUCTOR DETAIL ======== --> +<ul class="blockList"> +<li class="blockList"><a name="constructor.detail"> +<!-- --> +</a> +<h3>Constructor Detail</h3> +<a name="JavaSparkContext-org.apache.spark.SparkContext-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(<a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc)</pre> +</li> +</ul> +<a name="JavaSparkContext--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext()</pre> +<div class="block">Create a JavaSparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit).</div> +</li> +</ul> +<a name="JavaSparkContext-org.apache.spark.SparkConf-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(<a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</pre> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>conf</code> - a <a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><code>SparkConf</code></a> object specifying Spark parameters</dd> +</dl> +</li> +</ul> +<a name="JavaSparkContext-java.lang.String-java.lang.String-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName)</pre> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd> +<dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd> +</dl> +</li> +</ul> +<a name="JavaSparkContext-java.lang.String-java.lang.String-org.apache.spark.SparkConf-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName, + <a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</pre> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd> +<dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd> +<dd><code>conf</code> - a <a href="../../../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><code>SparkConf</code></a> object specifying other Spark parameters</dd> +</dl> +</li> +</ul> +<a name="JavaSparkContext-java.lang.String-java.lang.String-java.lang.String-java.lang.String-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName, + String sparkHome, + String jarFile)</pre> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd> +<dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd> +<dd><code>sparkHome</code> - The SPARK_HOME directory on the slave nodes</dd> +<dd><code>jarFile</code> - JAR file to send to the cluster. This can be a path on the local file system + or an HDFS, HTTP, HTTPS, or FTP URL.</dd> +</dl> +</li> +</ul> +<a name="JavaSparkContext-java.lang.String-java.lang.String-java.lang.String-java.lang.String:A-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName, + String sparkHome, + String[] jars)</pre> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd> +<dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd> +<dd><code>sparkHome</code> - The SPARK_HOME directory on the slave nodes</dd> +<dd><code>jars</code> - Collection of JARs to send to the cluster. These can be paths on the local file + system or HDFS, HTTP, HTTPS, or FTP URLs.</dd> +</dl> +</li> +</ul> +<a name="JavaSparkContext-java.lang.String-java.lang.String-java.lang.String-java.lang.String:A-java.util.Map-"> +<!-- --> +</a> +<ul class="blockListLast"> +<li class="blockList"> +<h4>JavaSparkContext</h4> +<pre>public JavaSparkContext(String master, + String appName, + String sparkHome, + String[] jars, + java.util.Map<String,String> environment)</pre> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd> +<dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd> +<dd><code>sparkHome</code> - The SPARK_HOME directory on the slave nodes</dd> +<dd><code>jars</code> - Collection of JARs to send to the cluster. These can be paths on the local file + system or HDFS, HTTP, HTTPS, or FTP URLs.</dd> +<dd><code>environment</code> - Environment variables to set on worker nodes</dd> +</dl> +</li> +</ul> +</li> +</ul> +<!-- ============ METHOD DETAIL ========== --> +<ul class="blockList"> +<li class="blockList"><a name="method.detail"> +<!-- --> +</a> +<h3>Method Detail</h3> +<a name="fromSparkContext-org.apache.spark.SparkContext-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>fromSparkContext</h4> +<pre>public static <a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html" title="class in org.apache.spark.api.java">JavaSparkContext</a> fromSparkContext(<a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc)</pre> +</li> +</ul> +<a name="toSparkContext-org.apache.spark.api.java.JavaSparkContext-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>toSparkContext</h4> +<pre>public static <a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> toSparkContext(<a href="../../../../../org/apache/spark/api/java/JavaSparkContext.html" title="class in org.apache.spark.api.java">JavaSparkContext</a> jsc)</pre> +</li> +</ul> +<a name="jarOfClass-java.lang.Class-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>jarOfClass</h4> +<pre>public static String[] jarOfClass(Class<?> cls)</pre> +<div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass + their JARs to SparkContext.</div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>cls</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +</dl> +</li> +</ul> +<a name="jarOfObject-java.lang.Object-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>jarOfObject</h4> +<pre>public static String[] jarOfObject(Object obj)</pre> +<div class="block">Find the JAR that contains the class of a particular object, to make it easy for users + to pass their JARs to SparkContext. In most cases you can call jarOfObject(this) in + your driver program.</div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>obj</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +</dl> +</li> +</ul> +<a name="sc--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>sc</h4> +<pre>public <a href="../../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sc()</pre> +</li> +</ul> +<a name="statusTracker--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>statusTracker</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaSparkStatusTracker.html" title="class in org.apache.spark.api.java">JavaSparkStatusTracker</a> statusTracker()</pre> +</li> +</ul> +<a name="isLocal--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>isLocal</h4> +<pre>public Boolean isLocal()</pre> +</li> +</ul> +<a name="sparkUser--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>sparkUser</h4> +<pre>public String sparkUser()</pre> +</li> +</ul> +<a name="master--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>master</h4> +<pre>public String master()</pre> +</li> +</ul> +<a name="appName--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>appName</h4> +<pre>public String appName()</pre> +</li> +</ul> +<a name="jars--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>jars</h4> +<pre>public java.util.List<String> jars()</pre> +</li> +</ul> +<a name="startTime--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>startTime</h4> +<pre>public Long startTime()</pre> +</li> +</ul> +<a name="version--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>version</h4> +<pre>public String version()</pre> +<div class="block">The version of Spark on which this application is running.</div> +</li> +</ul> +<a name="defaultParallelism--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>defaultParallelism</h4> +<pre>public Integer defaultParallelism()</pre> +<div class="block">Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD).</div> +</li> +</ul> +<a name="defaultMinPartitions--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>defaultMinPartitions</h4> +<pre>public Integer defaultMinPartitions()</pre> +<div class="block">Default min number of partitions for Hadoop RDDs when not given by user</div> +</li> +</ul> +<a name="parallelize-java.util.List-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelize</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> parallelize(java.util.List<T> list, + int numSlices)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="emptyRDD--"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>emptyRDD</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> emptyRDD()</pre> +<div class="block">Get an RDD that has no partitions or elements.</div> +</li> +</ul> +<a name="parallelize-java.util.List-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelize</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> parallelize(java.util.List<T> list)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="parallelizePairs-java.util.List-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelizePairs</h4> +<pre>public <K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> parallelizePairs(java.util.List<scala.Tuple2<K,V>> list, + int numSlices)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="parallelizePairs-java.util.List-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelizePairs</h4> +<pre>public <K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> parallelizePairs(java.util.List<scala.Tuple2<K,V>> list)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="parallelizeDoubles-java.util.List-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelizeDoubles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a> parallelizeDoubles(java.util.List<Double> list, + int numSlices)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="parallelizeDoubles-java.util.List-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>parallelizeDoubles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaDoubleRDD.html" title="class in org.apache.spark.api.java">JavaDoubleRDD</a> parallelizeDoubles(java.util.List<Double> list)</pre> +<div class="block">Distribute a local Scala collection to form an RDD.</div> +</li> +</ul> +<a name="textFile-java.lang.String-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>textFile</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><String> textFile(String path)</pre> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +</dl> +</li> +</ul> +<a name="textFile-java.lang.String-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>textFile</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><String> textFile(String path, + int minPartitions)</pre> +<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI, and return it as an RDD of Strings.</div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - (undocumented)</dd> +<dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +</dl> +</li> +</ul> +<a name="wholeTextFiles-java.lang.String-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>wholeTextFiles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,String> wholeTextFiles(String path, + int minPartitions)</pre> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI. Each file is read as a single record and returned in a + key-value pair, where the key is the path of each file, the value is the content of each file. + <p> + <p> For example, if you have the following files: + <pre><code> + hdfs://a-hdfs-path/part-00000 + hdfs://a-hdfs-path/part-00001 + ... + hdfs://a-hdfs-path/part-nnnnn + </code></pre> + <p> + Do + <pre><code> + JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path") + </code></pre> + <p> + <p> then <code>rdd</code> contains + <pre><code> + (a-hdfs-path/part-00000, its content) + (a-hdfs-path/part-00001, its content) + ... + (a-hdfs-path/part-nnnnn, its content) + </code></pre> + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd> +<dd><code>path</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +<dt><span class="simpleTagLabel">Note:</span></dt> +<dd>Small files are preferred, large file is also allowable, but may cause bad performance. + <p></dd> +</dl> +</li> +</ul> +<a name="wholeTextFiles-java.lang.String-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>wholeTextFiles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,String> wholeTextFiles(String path)</pre> +<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any + Hadoop-supported file system URI. Each file is read as a single record and returned in a + key-value pair, where the key is the path of each file, the value is the content of each file. + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +<dt><span class="seeLabel">See Also:</span></dt> +<dd><code>wholeTextFiles(path: String, minPartitions: Int)</code>.</dd> +</dl> +</li> +</ul> +<a name="binaryFiles-java.lang.String-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>binaryFiles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,<a href="../../../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>> binaryFiles(String path, + int minPartitions)</pre> +<div class="block">Read a directory of binary files from HDFS, a local file system (available on all nodes), + or any Hadoop-supported file system URI as a byte array. Each file is read as a single + record and returned in a key-value pair, where the key is the path of each file, + the value is the content of each file. + <p> + For example, if you have the following files: + <pre><code> + hdfs://a-hdfs-path/part-00000 + hdfs://a-hdfs-path/part-00001 + ... + hdfs://a-hdfs-path/part-nnnnn + </code></pre> + <p> + Do + <pre><code> + JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path") + </code></pre> + <p> + then <code>rdd</code> contains + <pre><code> + (a-hdfs-path/part-00000, its content) + (a-hdfs-path/part-00001, its content) + ... + (a-hdfs-path/part-nnnnn, its content) + </code></pre> + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd> +<dd><code>path</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +<dt><span class="simpleTagLabel">Note:</span></dt> +<dd>Small files are preferred; very large files but may cause bad performance. + <p></dd> +</dl> +</li> +</ul> +<a name="binaryFiles-java.lang.String-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>binaryFiles</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><String,<a href="../../../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>> binaryFiles(String path)</pre> +<div class="block">Read a directory of binary files from HDFS, a local file system (available on all nodes), + or any Hadoop-supported file system URI as a byte array. Each file is read as a single + record and returned in a key-value pair, where the key is the path of each file, + the value is the content of each file. + <p> + For example, if you have the following files: + <pre><code> + hdfs://a-hdfs-path/part-00000 + hdfs://a-hdfs-path/part-00001 + ... + hdfs://a-hdfs-path/part-nnnnn + </code></pre> + <p> + Do + <pre><code> + JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path") + </code></pre>, + <p> + then <code>rdd</code> contains + <pre><code> + (a-hdfs-path/part-00000, its content) + (a-hdfs-path/part-00001, its content) + ... + (a-hdfs-path/part-nnnnn, its content) + </code></pre> + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +<dt><span class="simpleTagLabel">Note:</span></dt> +<dd>Small files are preferred; very large files but may cause bad performance.</dd> +</dl> +</li> +</ul> +<a name="binaryRecords-java.lang.String-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>binaryRecords</h4> +<pre>public <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><byte[]> binaryRecords(String path, + int recordLength)</pre> +<div class="block">Load data from a flat binary file, assuming the length of each record is constant. + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - Directory to the input data files</dd> +<dd><code>recordLength</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>An RDD of data with values, represented as byte arrays</dd> +</dl> +</li> +</ul> +<a name="sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>sequenceFile</h4> +<pre>public <K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> sequenceFile(String path, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</pre> +<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types. + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - (undocumented)</dd> +<dd><code>keyClass</code> - (undocumented)</dd> +<dd><code>valueClass</code> - (undocumented)</dd> +<dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +<dt><span class="simpleTagLabel">Note:</span></dt> +<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd> +</dl> +</li> +</ul> +<a name="sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>sequenceFile</h4> +<pre>public <K,V> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> sequenceFile(String path, + Class<K> keyClass, + Class<V> valueClass)</pre> +<div class="block">Get an RDD for a Hadoop SequenceFile. + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - (undocumented)</dd> +<dd><code>keyClass</code> - (undocumented)</dd> +<dd><code>valueClass</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +<dt><span class="simpleTagLabel">Note:</span></dt> +<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd> +</dl> +</li> +</ul> +<a name="objectFile-java.lang.String-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>objectFile</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> objectFile(String path, + int minPartitions)</pre> +<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and + BytesWritable values that contain a serialized partition. This is still an experimental storage + format and may not be supported exactly as is in future Spark releases. It will also be pretty + slow if you use the default serializer (Java serialization), though the nice thing about it is + that there's very little effort required to save arbitrary objects.</div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - (undocumented)</dd> +<dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +</dl> +</li> +</ul> +<a name="objectFile-java.lang.String-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>objectFile</h4> +<pre>public <T> <a href="../../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><T> objectFile(String path)</pre> +<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and + BytesWritable values that contain a serialized partition. This is still an experimental storage + format and may not be supported exactly as is in future Spark releases. It will also be pretty + slow if you use the default serializer (Java serialization), though the nice thing about it is + that there's very little effort required to save arbitrary objects.</div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +</dl> +</li> +</ul> +<a name="hadoopRDD-org.apache.hadoop.mapred.JobConf-java.lang.Class-java.lang.Class-java.lang.Class-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopRDD</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> hadoopRDD(org.apache.hadoop.mapred.JobConf conf, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</pre> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any + other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable, + etc). + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>conf</code> - JobConf for setting up the dataset. Note: This will be put into a Broadcast. + Therefore if you plan to reuse this conf to create multiple RDDs, you need to make + sure you won't modify the conf. A safe approach is always creating a new conf for + a new RDD.</dd> +<dd><code>inputFormatClass</code> - Class of the InputFormat</dd> +<dd><code>keyClass</code> - Class of the keys</dd> +<dd><code>valueClass</code> - Class of the values</dd> +<dd><code>minPartitions</code> - Minimum number of Hadoop Splits to generate. + <p></dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +<dt><span class="simpleTagLabel">Note:</span></dt> +<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd> +</dl> +</li> +</ul> +<a name="hadoopRDD-org.apache.hadoop.mapred.JobConf-java.lang.Class-java.lang.Class-java.lang.Class-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopRDD</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> hadoopRDD(org.apache.hadoop.mapred.JobConf conf, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass)</pre> +<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any + other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable, + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>conf</code> - JobConf for setting up the dataset. Note: This will be put into a Broadcast. + Therefore if you plan to reuse this conf to create multiple RDDs, you need to make + sure you won't modify the conf. A safe approach is always creating a new conf for + a new RDD.</dd> +<dd><code>inputFormatClass</code> - Class of the InputFormat</dd> +<dd><code>keyClass</code> - Class of the keys</dd> +<dd><code>valueClass</code> - Class of the values + <p></dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +<dt><span class="simpleTagLabel">Note:</span></dt> +<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd> +</dl> +</li> +</ul> +<a name="hadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-int-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopFile</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> hadoopFile(String path, + Class<F> inputFormatClass, + Class<K> keyClass, + Class<V> valueClass, + int minPartitions)</pre> +<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat. + <p></div> +<dl> +<dt><span class="paramLabel">Parameters:</span></dt> +<dd><code>path</code> - (undocumented)</dd> +<dd><code>inputFormatClass</code> - (undocumented)</dd> +<dd><code>keyClass</code> - (undocumented)</dd> +<dd><code>valueClass</code> - (undocumented)</dd> +<dd><code>minPartitions</code> - (undocumented)</dd> +<dt><span class="returnLabel">Returns:</span></dt> +<dd>(undocumented)</dd> +<dt><span class="simpleTagLabel">Note:</span></dt> +<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each + record, directly caching the returned RDD will create many references to the same object. + If you plan to directly cache Hadoop writable objects, you should first copy them using + a <code>map</code> function.</dd> +</dl> +</li> +</ul> +<a name="hadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-"> +<!-- --> +</a> +<ul class="blockList"> +<li class="blockList"> +<h4>hadoopFile</h4> +<pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../../../org/apache/spark/api/java/JavaPairRDD.html" title="class in org.apache.spark.api.java">JavaPairRDD</a><K,V> hadoopFile(String path, +
<TRUNCATED> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org