Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/selectExpr.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/selectExpr.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/selectExpr.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,120 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: SelectExpr</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for selectExpr {SparkR}"><tr><td>selectExpr {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>SelectExpr</h2> + +<h3>Description</h3> + +<p>Select from a SparkDataFrame using a set of SQL expressions. +</p> + + +<h3>Usage</h3> + +<pre> +selectExpr(x, expr, ...) + +## S4 method for signature 'SparkDataFrame,character' +selectExpr(x, expr, ...) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>x</code></td> +<td> +<p>A SparkDataFrame to be selected from.</p> +</td></tr> +<tr valign="top"><td><code>expr</code></td> +<td> +<p>A string containing a SQL expression</p> +</td></tr> +<tr valign="top"><td><code>...</code></td> +<td> +<p>Additional expressions</p> +</td></tr> +</table> + + +<h3>Value</h3> + +<p>A SparkDataFrame +</p> + + +<h3>Note</h3> + +<p>selectExpr since 1.4.0 +</p> + + +<h3>See Also</h3> + +<p>Other SparkDataFrame functions: <code><a href="SparkDataFrame.html">SparkDataFrame-class</a></code>, +<code><a href="summarize.html">agg</a></code>, <code><a href="alias.html">alias</a></code>, +<code><a href="arrange.html">arrange</a></code>, <code><a href="as.data.frame.html">as.data.frame</a></code>, +<code><a href="attach.html">attach,SparkDataFrame-method</a></code>, +<code><a href="broadcast.html">broadcast</a></code>, <code><a href="cache.html">cache</a></code>, +<code><a href="checkpoint.html">checkpoint</a></code>, <code><a href="coalesce.html">coalesce</a></code>, +<code><a href="collect.html">collect</a></code>, <code><a href="columns.html">colnames</a></code>, +<code><a href="coltypes.html">coltypes</a></code>, +<code><a href="createOrReplaceTempView.html">createOrReplaceTempView</a></code>, +<code><a href="crossJoin.html">crossJoin</a></code>, <code><a href="cube.html">cube</a></code>, +<code><a href="dapplyCollect.html">dapplyCollect</a></code>, <code><a href="dapply.html">dapply</a></code>, +<code><a href="describe.html">describe</a></code>, <code><a href="dim.html">dim</a></code>, +<code><a href="distinct.html">distinct</a></code>, <code><a href="dropDuplicates.html">dropDuplicates</a></code>, +<code><a href="nafunctions.html">dropna</a></code>, <code><a href="drop.html">drop</a></code>, +<code><a href="dtypes.html">dtypes</a></code>, <code><a href="except.html">except</a></code>, +<code><a href="explain.html">explain</a></code>, <code><a href="filter.html">filter</a></code>, +<code><a href="first.html">first</a></code>, <code><a href="gapplyCollect.html">gapplyCollect</a></code>, +<code><a href="gapply.html">gapply</a></code>, <code><a href="getNumPartitions.html">getNumPartitions</a></code>, +<code><a href="groupBy.html">group_by</a></code>, <code><a href="head.html">head</a></code>, +<code><a href="hint.html">hint</a></code>, <code><a href="histogram.html">histogram</a></code>, +<code><a href="insertInto.html">insertInto</a></code>, <code><a href="intersect.html">intersect</a></code>, +<code><a href="isLocal.html">isLocal</a></code>, <code><a href="isStreaming.html">isStreaming</a></code>, +<code><a href="join.html">join</a></code>, <code><a href="limit.html">limit</a></code>, +<code><a href="localCheckpoint.html">localCheckpoint</a></code>, <code><a href="merge.html">merge</a></code>, +<code><a href="mutate.html">mutate</a></code>, <code><a href="ncol.html">ncol</a></code>, +<code><a href="nrow.html">nrow</a></code>, <code><a href="persist.html">persist</a></code>, +<code><a href="printSchema.html">printSchema</a></code>, <code><a href="randomSplit.html">randomSplit</a></code>, +<code><a href="rbind.html">rbind</a></code>, <code><a href="registerTempTable-deprecated.html">registerTempTable</a></code>, +<code><a href="rename.html">rename</a></code>, <code><a href="repartition.html">repartition</a></code>, +<code><a href="rollup.html">rollup</a></code>, <code><a href="sample.html">sample</a></code>, +<code><a href="saveAsTable.html">saveAsTable</a></code>, <code><a href="schema.html">schema</a></code>, +<code><a href="select.html">select</a></code>, <code><a href="showDF.html">showDF</a></code>, +<code><a href="show.html">show</a></code>, <code><a href="storageLevel.html">storageLevel</a></code>, +<code><a href="str.html">str</a></code>, <code><a href="subset.html">subset</a></code>, +<code><a href="summary.html">summary</a></code>, <code><a href="take.html">take</a></code>, +<code><a href="toJSON.html">toJSON</a></code>, <code><a href="unionByName.html">unionByName</a></code>, +<code><a href="union.html">union</a></code>, <code><a href="unpersist.html">unpersist</a></code>, +<code><a href="withColumn.html">withColumn</a></code>, <code><a href="withWatermark.html">withWatermark</a></code>, +<code><a href="with.html">with</a></code>, <code><a href="write.df.html">write.df</a></code>, +<code><a href="write.jdbc.html">write.jdbc</a></code>, <code><a href="write.json.html">write.json</a></code>, +<code><a href="write.orc.html">write.orc</a></code>, <code><a href="write.parquet.html">write.parquet</a></code>, +<code><a href="write.stream.html">write.stream</a></code>, <code><a href="write.text.html">write.text</a></code> +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D sparkR.session() +##D path <- "path/to/file.json" +##D df <- read.json(path) +##D selectExpr(df, "col1", "(col2 * 5) as newCol") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html>
Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/setCheckpointDir.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/setCheckpointDir.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/setCheckpointDir.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,60 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Set checkpoint directory</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for setCheckpointDir {SparkR}"><tr><td>setCheckpointDir {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Set checkpoint directory</h2> + +<h3>Description</h3> + +<p>Set the directory under which SparkDataFrame are going to be checkpointed. The directory must be +a HDFS path if running on a cluster. +</p> + + +<h3>Usage</h3> + +<pre> +setCheckpointDir(directory) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>directory</code></td> +<td> +<p>Directory path to checkpoint to</p> +</td></tr> +</table> + + +<h3>Note</h3> + +<p>setCheckpointDir since 2.2.0 +</p> + + +<h3>See Also</h3> + +<p><a href="checkpoint.html">checkpoint</a> +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D setCheckpointDir("/checkpoint") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/setCurrentDatabase.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/setCurrentDatabase.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/setCurrentDatabase.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,54 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Sets the current default database</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for setCurrentDatabase {SparkR}"><tr><td>setCurrentDatabase {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Sets the current default database</h2> + +<h3>Description</h3> + +<p>Sets the current default database. +</p> + + +<h3>Usage</h3> + +<pre> +setCurrentDatabase(databaseName) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>databaseName</code></td> +<td> +<p>name of the database</p> +</td></tr> +</table> + + +<h3>Note</h3> + +<p>since 2.2.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D sparkR.session() +##D setCurrentDatabase("default") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/setJobDescription.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/setJobDescription.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/setJobDescription.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,53 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Set a human readable description of the current job.</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for setJobDescription {SparkR}"><tr><td>setJobDescription {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Set a human readable description of the current job.</h2> + +<h3>Description</h3> + +<p>Set a description that is shown as a job description in UI. +</p> + + +<h3>Usage</h3> + +<pre> +setJobDescription(value) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>value</code></td> +<td> +<p>The job description of the current job.</p> +</td></tr> +</table> + + +<h3>Note</h3> + +<p>setJobDescription since 2.3.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D setJobDescription("This is an example job.") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/setJobGroup.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/setJobGroup.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/setJobGroup.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,65 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Assigns a group ID to all the jobs started by this thread...</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for setJobGroup {SparkR}"><tr><td>setJobGroup {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Assigns a group ID to all the jobs started by this thread until the group ID is set to a +different value or cleared.</h2> + +<h3>Description</h3> + +<p>Assigns a group ID to all the jobs started by this thread until the group ID is set to a +different value or cleared. +</p> + + +<h3>Usage</h3> + +<pre> +## Default S3 method: +setJobGroup(groupId, description, interruptOnCancel) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>groupId</code></td> +<td> +<p>the ID to be assigned to job groups.</p> +</td></tr> +<tr valign="top"><td><code>description</code></td> +<td> +<p>description for the job group ID.</p> +</td></tr> +<tr valign="top"><td><code>interruptOnCancel</code></td> +<td> +<p>flag to indicate if the job is interrupted on job cancellation.</p> +</td></tr> +</table> + + +<h3>Note</h3> + +<p>setJobGroup since 1.5.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D sparkR.session() +##D setJobGroup("myJobGroup", "My job group description", TRUE) +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/setLocalProperty.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/setLocalProperty.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/setLocalProperty.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,59 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Set a local property that affects jobs submitted from this...</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for setLocalProperty {SparkR}"><tr><td>setLocalProperty {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Set a local property that affects jobs submitted from this thread, such as the +Spark fair scheduler pool.</h2> + +<h3>Description</h3> + +<p>Set a local property that affects jobs submitted from this thread, such as the +Spark fair scheduler pool. +</p> + + +<h3>Usage</h3> + +<pre> +setLocalProperty(key, value) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>key</code></td> +<td> +<p>The key for a local property.</p> +</td></tr> +<tr valign="top"><td><code>value</code></td> +<td> +<p>The value for a local property.</p> +</td></tr> +</table> + + +<h3>Note</h3> + +<p>setLocalProperty since 2.3.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D setLocalProperty("spark.scheduler.pool", "poolA") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/setLogLevel.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/setLogLevel.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/setLogLevel.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,53 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Set new log level</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for setLogLevel {SparkR}"><tr><td>setLogLevel {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Set new log level</h2> + +<h3>Description</h3> + +<p>Set new log level: "ALL", "DEBUG", "ERROR", "FATAL", "INFO", "OFF", "TRACE", "WARN" +</p> + + +<h3>Usage</h3> + +<pre> +setLogLevel(level) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>level</code></td> +<td> +<p>New log level</p> +</td></tr> +</table> + + +<h3>Note</h3> + +<p>setLogLevel since 2.0.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D setLogLevel("ERROR") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/show.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/show.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/show.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,124 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: show</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for show {SparkR}"><tr><td>show {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>show</h2> + +<h3>Description</h3> + +<p>Print class and type information of a Spark object. +</p> + + +<h3>Usage</h3> + +<pre> +## S4 method for signature 'Column' +show(object) + +## S4 method for signature 'GroupedData' +show(object) + +## S4 method for signature 'SparkDataFrame' +show(object) + +## S4 method for signature 'WindowSpec' +show(object) + +## S4 method for signature 'StreamingQuery' +show(object) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>object</code></td> +<td> +<p>a Spark object. Can be a SparkDataFrame, Column, GroupedData, WindowSpec.</p> +</td></tr> +</table> + + +<h3>Note</h3> + +<p>show(Column) since 1.4.0 +</p> +<p>show(GroupedData) since 1.4.0 +</p> +<p>show(SparkDataFrame) since 1.4.0 +</p> +<p>show(WindowSpec) since 2.0.0 +</p> +<p>show(StreamingQuery) since 2.2.0 +</p> + + +<h3>See Also</h3> + +<p>Other SparkDataFrame functions: <code><a href="SparkDataFrame.html">SparkDataFrame-class</a></code>, +<code><a href="summarize.html">agg</a></code>, <code><a href="alias.html">alias</a></code>, +<code><a href="arrange.html">arrange</a></code>, <code><a href="as.data.frame.html">as.data.frame</a></code>, +<code><a href="attach.html">attach,SparkDataFrame-method</a></code>, +<code><a href="broadcast.html">broadcast</a></code>, <code><a href="cache.html">cache</a></code>, +<code><a href="checkpoint.html">checkpoint</a></code>, <code><a href="coalesce.html">coalesce</a></code>, +<code><a href="collect.html">collect</a></code>, <code><a href="columns.html">colnames</a></code>, +<code><a href="coltypes.html">coltypes</a></code>, +<code><a href="createOrReplaceTempView.html">createOrReplaceTempView</a></code>, +<code><a href="crossJoin.html">crossJoin</a></code>, <code><a href="cube.html">cube</a></code>, +<code><a href="dapplyCollect.html">dapplyCollect</a></code>, <code><a href="dapply.html">dapply</a></code>, +<code><a href="describe.html">describe</a></code>, <code><a href="dim.html">dim</a></code>, +<code><a href="distinct.html">distinct</a></code>, <code><a href="dropDuplicates.html">dropDuplicates</a></code>, +<code><a href="nafunctions.html">dropna</a></code>, <code><a href="drop.html">drop</a></code>, +<code><a href="dtypes.html">dtypes</a></code>, <code><a href="except.html">except</a></code>, +<code><a href="explain.html">explain</a></code>, <code><a href="filter.html">filter</a></code>, +<code><a href="first.html">first</a></code>, <code><a href="gapplyCollect.html">gapplyCollect</a></code>, +<code><a href="gapply.html">gapply</a></code>, <code><a href="getNumPartitions.html">getNumPartitions</a></code>, +<code><a href="groupBy.html">group_by</a></code>, <code><a href="head.html">head</a></code>, +<code><a href="hint.html">hint</a></code>, <code><a href="histogram.html">histogram</a></code>, +<code><a href="insertInto.html">insertInto</a></code>, <code><a href="intersect.html">intersect</a></code>, +<code><a href="isLocal.html">isLocal</a></code>, <code><a href="isStreaming.html">isStreaming</a></code>, +<code><a href="join.html">join</a></code>, <code><a href="limit.html">limit</a></code>, +<code><a href="localCheckpoint.html">localCheckpoint</a></code>, <code><a href="merge.html">merge</a></code>, +<code><a href="mutate.html">mutate</a></code>, <code><a href="ncol.html">ncol</a></code>, +<code><a href="nrow.html">nrow</a></code>, <code><a href="persist.html">persist</a></code>, +<code><a href="printSchema.html">printSchema</a></code>, <code><a href="randomSplit.html">randomSplit</a></code>, +<code><a href="rbind.html">rbind</a></code>, <code><a href="registerTempTable-deprecated.html">registerTempTable</a></code>, +<code><a href="rename.html">rename</a></code>, <code><a href="repartition.html">repartition</a></code>, +<code><a href="rollup.html">rollup</a></code>, <code><a href="sample.html">sample</a></code>, +<code><a href="saveAsTable.html">saveAsTable</a></code>, <code><a href="schema.html">schema</a></code>, +<code><a href="selectExpr.html">selectExpr</a></code>, <code><a href="select.html">select</a></code>, +<code><a href="showDF.html">showDF</a></code>, <code><a href="storageLevel.html">storageLevel</a></code>, +<code><a href="str.html">str</a></code>, <code><a href="subset.html">subset</a></code>, +<code><a href="summary.html">summary</a></code>, <code><a href="take.html">take</a></code>, +<code><a href="toJSON.html">toJSON</a></code>, <code><a href="unionByName.html">unionByName</a></code>, +<code><a href="union.html">union</a></code>, <code><a href="unpersist.html">unpersist</a></code>, +<code><a href="withColumn.html">withColumn</a></code>, <code><a href="withWatermark.html">withWatermark</a></code>, +<code><a href="with.html">with</a></code>, <code><a href="write.df.html">write.df</a></code>, +<code><a href="write.jdbc.html">write.jdbc</a></code>, <code><a href="write.json.html">write.json</a></code>, +<code><a href="write.orc.html">write.orc</a></code>, <code><a href="write.parquet.html">write.parquet</a></code>, +<code><a href="write.stream.html">write.stream</a></code>, <code><a href="write.text.html">write.text</a></code> +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D sparkR.session() +##D path <- "path/to/file.json" +##D df <- read.json(path) +##D show(df) +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/showDF.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/showDF.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/showDF.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,126 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: showDF</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for showDF {SparkR}"><tr><td>showDF {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>showDF</h2> + +<h3>Description</h3> + +<p>Print the first numRows rows of a SparkDataFrame +</p> + + +<h3>Usage</h3> + +<pre> +showDF(x, ...) + +## S4 method for signature 'SparkDataFrame' +showDF(x, numRows = 20, truncate = TRUE, + vertical = FALSE) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>x</code></td> +<td> +<p>a SparkDataFrame.</p> +</td></tr> +<tr valign="top"><td><code>...</code></td> +<td> +<p>further arguments to be passed to or from other methods.</p> +</td></tr> +<tr valign="top"><td><code>numRows</code></td> +<td> +<p>the number of rows to print. Defaults to 20.</p> +</td></tr> +<tr valign="top"><td><code>truncate</code></td> +<td> +<p>whether truncate long strings. If <code>TRUE</code>, strings more than +20 characters will be truncated. However, if set greater than zero, +truncates strings longer than <code>truncate</code> characters and all cells +will be aligned right.</p> +</td></tr> +<tr valign="top"><td><code>vertical</code></td> +<td> +<p>whether print output rows vertically (one line per column value).</p> +</td></tr> +</table> + + +<h3>Note</h3> + +<p>showDF since 1.4.0 +</p> + + +<h3>See Also</h3> + +<p>Other SparkDataFrame functions: <code><a href="SparkDataFrame.html">SparkDataFrame-class</a></code>, +<code><a href="summarize.html">agg</a></code>, <code><a href="alias.html">alias</a></code>, +<code><a href="arrange.html">arrange</a></code>, <code><a href="as.data.frame.html">as.data.frame</a></code>, +<code><a href="attach.html">attach,SparkDataFrame-method</a></code>, +<code><a href="broadcast.html">broadcast</a></code>, <code><a href="cache.html">cache</a></code>, +<code><a href="checkpoint.html">checkpoint</a></code>, <code><a href="coalesce.html">coalesce</a></code>, +<code><a href="collect.html">collect</a></code>, <code><a href="columns.html">colnames</a></code>, +<code><a href="coltypes.html">coltypes</a></code>, +<code><a href="createOrReplaceTempView.html">createOrReplaceTempView</a></code>, +<code><a href="crossJoin.html">crossJoin</a></code>, <code><a href="cube.html">cube</a></code>, +<code><a href="dapplyCollect.html">dapplyCollect</a></code>, <code><a href="dapply.html">dapply</a></code>, +<code><a href="describe.html">describe</a></code>, <code><a href="dim.html">dim</a></code>, +<code><a href="distinct.html">distinct</a></code>, <code><a href="dropDuplicates.html">dropDuplicates</a></code>, +<code><a href="nafunctions.html">dropna</a></code>, <code><a href="drop.html">drop</a></code>, +<code><a href="dtypes.html">dtypes</a></code>, <code><a href="except.html">except</a></code>, +<code><a href="explain.html">explain</a></code>, <code><a href="filter.html">filter</a></code>, +<code><a href="first.html">first</a></code>, <code><a href="gapplyCollect.html">gapplyCollect</a></code>, +<code><a href="gapply.html">gapply</a></code>, <code><a href="getNumPartitions.html">getNumPartitions</a></code>, +<code><a href="groupBy.html">group_by</a></code>, <code><a href="head.html">head</a></code>, +<code><a href="hint.html">hint</a></code>, <code><a href="histogram.html">histogram</a></code>, +<code><a href="insertInto.html">insertInto</a></code>, <code><a href="intersect.html">intersect</a></code>, +<code><a href="isLocal.html">isLocal</a></code>, <code><a href="isStreaming.html">isStreaming</a></code>, +<code><a href="join.html">join</a></code>, <code><a href="limit.html">limit</a></code>, +<code><a href="localCheckpoint.html">localCheckpoint</a></code>, <code><a href="merge.html">merge</a></code>, +<code><a href="mutate.html">mutate</a></code>, <code><a href="ncol.html">ncol</a></code>, +<code><a href="nrow.html">nrow</a></code>, <code><a href="persist.html">persist</a></code>, +<code><a href="printSchema.html">printSchema</a></code>, <code><a href="randomSplit.html">randomSplit</a></code>, +<code><a href="rbind.html">rbind</a></code>, <code><a href="registerTempTable-deprecated.html">registerTempTable</a></code>, +<code><a href="rename.html">rename</a></code>, <code><a href="repartition.html">repartition</a></code>, +<code><a href="rollup.html">rollup</a></code>, <code><a href="sample.html">sample</a></code>, +<code><a href="saveAsTable.html">saveAsTable</a></code>, <code><a href="schema.html">schema</a></code>, +<code><a href="selectExpr.html">selectExpr</a></code>, <code><a href="select.html">select</a></code>, +<code><a href="show.html">show</a></code>, <code><a href="storageLevel.html">storageLevel</a></code>, +<code><a href="str.html">str</a></code>, <code><a href="subset.html">subset</a></code>, +<code><a href="summary.html">summary</a></code>, <code><a href="take.html">take</a></code>, +<code><a href="toJSON.html">toJSON</a></code>, <code><a href="unionByName.html">unionByName</a></code>, +<code><a href="union.html">union</a></code>, <code><a href="unpersist.html">unpersist</a></code>, +<code><a href="withColumn.html">withColumn</a></code>, <code><a href="withWatermark.html">withWatermark</a></code>, +<code><a href="with.html">with</a></code>, <code><a href="write.df.html">write.df</a></code>, +<code><a href="write.jdbc.html">write.jdbc</a></code>, <code><a href="write.json.html">write.json</a></code>, +<code><a href="write.orc.html">write.orc</a></code>, <code><a href="write.parquet.html">write.parquet</a></code>, +<code><a href="write.stream.html">write.stream</a></code>, <code><a href="write.text.html">write.text</a></code> +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D sparkR.session() +##D path <- "path/to/file.json" +##D df <- read.json(path) +##D showDF(df) +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.addFile.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.addFile.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.addFile.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,67 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Add a file or directory to be downloaded with this Spark job...</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for spark.addFile {SparkR}"><tr><td>spark.addFile {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Add a file or directory to be downloaded with this Spark job on every node.</h2> + +<h3>Description</h3> + +<p>The path passed can be either a local file, a file in HDFS (or other Hadoop-supported +filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, +use spark.getSparkFiles(fileName) to find its download location. +</p> + + +<h3>Usage</h3> + +<pre> +spark.addFile(path, recursive = FALSE) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>path</code></td> +<td> +<p>The path of the file to be added</p> +</td></tr> +<tr valign="top"><td><code>recursive</code></td> +<td> +<p>Whether to add files recursively from the path. Default is FALSE.</p> +</td></tr> +</table> + + +<h3>Details</h3> + +<p>A directory can be given if the recursive option is set to true. +Currently directories are only supported for Hadoop-supported filesystems. +Refer Hadoop-supported filesystems at <a href="https://wiki.apache.org/hadoop/HCFS">https://wiki.apache.org/hadoop/HCFS</a>. +</p> + + +<h3>Note</h3> + +<p>spark.addFile since 2.1.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D spark.addFile("~/myfile") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.als.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.als.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.als.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,204 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Alternating Least Squares (ALS) for Collaborative Filtering</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for spark.als {SparkR}"><tr><td>spark.als {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Alternating Least Squares (ALS) for Collaborative Filtering</h2> + +<h3>Description</h3> + +<p><code>spark.als</code> learns latent factors in collaborative filtering via alternating least +squares. Users can call <code>summary</code> to obtain fitted latent factors, <code>predict</code> +to make predictions on new data, and <code>write.ml</code>/<code>read.ml</code> to save/load fitted models. +</p> + + +<h3>Usage</h3> + +<pre> +spark.als(data, ...) + +## S4 method for signature 'SparkDataFrame' +spark.als(data, ratingCol = "rating", + userCol = "user", itemCol = "item", rank = 10, regParam = 0.1, + maxIter = 10, nonnegative = FALSE, implicitPrefs = FALSE, alpha = 1, + numUserBlocks = 10, numItemBlocks = 10, checkpointInterval = 10, + seed = 0) + +## S4 method for signature 'ALSModel' +summary(object) + +## S4 method for signature 'ALSModel' +predict(object, newData) + +## S4 method for signature 'ALSModel,character' +write.ml(object, path, overwrite = FALSE) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>data</code></td> +<td> +<p>a SparkDataFrame for training.</p> +</td></tr> +<tr valign="top"><td><code>...</code></td> +<td> +<p>additional argument(s) passed to the method.</p> +</td></tr> +<tr valign="top"><td><code>ratingCol</code></td> +<td> +<p>column name for ratings.</p> +</td></tr> +<tr valign="top"><td><code>userCol</code></td> +<td> +<p>column name for user ids. Ids must be (or can be coerced into) integers.</p> +</td></tr> +<tr valign="top"><td><code>itemCol</code></td> +<td> +<p>column name for item ids. Ids must be (or can be coerced into) integers.</p> +</td></tr> +<tr valign="top"><td><code>rank</code></td> +<td> +<p>rank of the matrix factorization (> 0).</p> +</td></tr> +<tr valign="top"><td><code>regParam</code></td> +<td> +<p>regularization parameter (>= 0).</p> +</td></tr> +<tr valign="top"><td><code>maxIter</code></td> +<td> +<p>maximum number of iterations (>= 0).</p> +</td></tr> +<tr valign="top"><td><code>nonnegative</code></td> +<td> +<p>logical value indicating whether to apply nonnegativity constraints.</p> +</td></tr> +<tr valign="top"><td><code>implicitPrefs</code></td> +<td> +<p>logical value indicating whether to use implicit preference.</p> +</td></tr> +<tr valign="top"><td><code>alpha</code></td> +<td> +<p>alpha parameter in the implicit preference formulation (>= 0).</p> +</td></tr> +<tr valign="top"><td><code>numUserBlocks</code></td> +<td> +<p>number of user blocks used to parallelize computation (> 0).</p> +</td></tr> +<tr valign="top"><td><code>numItemBlocks</code></td> +<td> +<p>number of item blocks used to parallelize computation (> 0).</p> +</td></tr> +<tr valign="top"><td><code>checkpointInterval</code></td> +<td> +<p>number of checkpoint intervals (>= 1) or disable checkpoint (-1). +Note: this setting will be ignored if the checkpoint directory is not +set.</p> +</td></tr> +<tr valign="top"><td><code>seed</code></td> +<td> +<p>integer seed for random number generation.</p> +</td></tr> +<tr valign="top"><td><code>object</code></td> +<td> +<p>a fitted ALS model.</p> +</td></tr> +<tr valign="top"><td><code>newData</code></td> +<td> +<p>a SparkDataFrame for testing.</p> +</td></tr> +<tr valign="top"><td><code>path</code></td> +<td> +<p>the directory where the model is saved.</p> +</td></tr> +<tr valign="top"><td><code>overwrite</code></td> +<td> +<p>logical value indicating whether to overwrite if the output path +already exists. Default is FALSE which means throw exception +if the output path exists.</p> +</td></tr> +</table> + + +<h3>Details</h3> + +<p>For more details, see +<a href="http://spark.apache.org/docs/latest/ml-collaborative-filtering.html">MLlib: +Collaborative Filtering</a>. +</p> + + +<h3>Value</h3> + +<p><code>spark.als</code> returns a fitted ALS model. +</p> +<p><code>summary</code> returns summary information of the fitted model, which is a list. +The list includes <code>user</code> (the names of the user column), +<code>item</code> (the item column), <code>rating</code> (the rating column), <code>userFactors</code> +(the estimated user factors), <code>itemFactors</code> (the estimated item factors), +and <code>rank</code> (rank of the matrix factorization model). +</p> +<p><code>predict</code> returns a SparkDataFrame containing predicted values. +</p> + + +<h3>Note</h3> + +<p>spark.als since 2.1.0 +</p> +<p>summary(ALSModel) since 2.1.0 +</p> +<p>predict(ALSModel) since 2.1.0 +</p> +<p>write.ml(ALSModel, character) since 2.1.0 +</p> + + +<h3>See Also</h3> + +<p><a href="read.ml.html">read.ml</a> +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D ratings <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0), list(1, 2, 4.0), +##D list(2, 1, 1.0), list(2, 2, 5.0)) +##D df <- createDataFrame(ratings, c("user", "item", "rating")) +##D model <- spark.als(df, "rating", "user", "item") +##D +##D # extract latent factors +##D stats <- summary(model) +##D userFactors <- stats$userFactors +##D itemFactors <- stats$itemFactors +##D +##D # make predictions +##D predicted <- predict(model, df) +##D showDF(predicted) +##D +##D # save and load the model +##D path <- "path/to/model" +##D write.ml(model, path) +##D savedModel <- read.ml(path) +##D summary(savedModel) +##D +##D # set other arguments +##D modelS <- spark.als(df, "rating", "user", "item", rank = 20, +##D regParam = 0.1, nonnegative = TRUE) +##D statsS <- summary(modelS) +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.bisectingKmeans.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.bisectingKmeans.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.bisectingKmeans.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,179 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Bisecting K-Means Clustering Model</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for spark.bisectingKmeans {SparkR}"><tr><td>spark.bisectingKmeans {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Bisecting K-Means Clustering Model</h2> + +<h3>Description</h3> + +<p>Fits a bisecting k-means clustering model against a SparkDataFrame. +Users can call <code>summary</code> to print a summary of the fitted model, <code>predict</code> to make +predictions on new data, and <code>write.ml</code>/<code>read.ml</code> to save/load fitted models. +</p> +<p>Get fitted result from a bisecting k-means model. +Note: A saved-loaded model does not support this method. +</p> + + +<h3>Usage</h3> + +<pre> +spark.bisectingKmeans(data, formula, ...) + +## S4 method for signature 'SparkDataFrame,formula' +spark.bisectingKmeans(data, formula, k = 4, + maxIter = 20, seed = NULL, minDivisibleClusterSize = 1) + +## S4 method for signature 'BisectingKMeansModel' +summary(object) + +## S4 method for signature 'BisectingKMeansModel' +predict(object, newData) + +## S4 method for signature 'BisectingKMeansModel' +fitted(object, method = c("centers", + "classes")) + +## S4 method for signature 'BisectingKMeansModel,character' +write.ml(object, path, + overwrite = FALSE) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>data</code></td> +<td> +<p>a SparkDataFrame for training.</p> +</td></tr> +<tr valign="top"><td><code>formula</code></td> +<td> +<p>a symbolic description of the model to be fitted. Currently only a few formula +operators are supported, including '~', '.', ':', '+', and '-'. +Note that the response variable of formula is empty in spark.bisectingKmeans.</p> +</td></tr> +<tr valign="top"><td><code>...</code></td> +<td> +<p>additional argument(s) passed to the method.</p> +</td></tr> +<tr valign="top"><td><code>k</code></td> +<td> +<p>the desired number of leaf clusters. Must be > 1. +The actual number could be smaller if there are no divisible leaf clusters.</p> +</td></tr> +<tr valign="top"><td><code>maxIter</code></td> +<td> +<p>maximum iteration number.</p> +</td></tr> +<tr valign="top"><td><code>seed</code></td> +<td> +<p>the random seed.</p> +</td></tr> +<tr valign="top"><td><code>minDivisibleClusterSize</code></td> +<td> +<p>The minimum number of points (if greater than or equal to 1.0) +or the minimum proportion of points (if less than 1.0) of a +divisible cluster. Note that it is an expert parameter. The +default value should be good enough for most cases.</p> +</td></tr> +<tr valign="top"><td><code>object</code></td> +<td> +<p>a fitted bisecting k-means model.</p> +</td></tr> +<tr valign="top"><td><code>newData</code></td> +<td> +<p>a SparkDataFrame for testing.</p> +</td></tr> +<tr valign="top"><td><code>method</code></td> +<td> +<p>type of fitted results, <code>"centers"</code> for cluster centers +or <code>"classes"</code> for assigned classes.</p> +</td></tr> +<tr valign="top"><td><code>path</code></td> +<td> +<p>the directory where the model is saved.</p> +</td></tr> +<tr valign="top"><td><code>overwrite</code></td> +<td> +<p>overwrites or not if the output path already exists. Default is FALSE +which means throw exception if the output path exists.</p> +</td></tr> +</table> + + +<h3>Value</h3> + +<p><code>spark.bisectingKmeans</code> returns a fitted bisecting k-means model. +</p> +<p><code>summary</code> returns summary information of the fitted model, which is a list. +The list includes the model's <code>k</code> (number of cluster centers), +<code>coefficients</code> (model cluster centers), +<code>size</code> (number of data points in each cluster), <code>cluster</code> +(cluster centers of the transformed data; cluster is NULL if is.loaded is TRUE), +and <code>is.loaded</code> (whether the model is loaded from a saved file). +</p> +<p><code>predict</code> returns the predicted values based on a bisecting k-means model. +</p> +<p><code>fitted</code> returns a SparkDataFrame containing fitted values. +</p> + + +<h3>Note</h3> + +<p>spark.bisectingKmeans since 2.2.0 +</p> +<p>summary(BisectingKMeansModel) since 2.2.0 +</p> +<p>predict(BisectingKMeansModel) since 2.2.0 +</p> +<p>fitted since 2.2.0 +</p> +<p>write.ml(BisectingKMeansModel, character) since 2.2.0 +</p> + + +<h3>See Also</h3> + +<p><a href="predict.html">predict</a>, <a href="read.ml.html">read.ml</a>, <a href="write.ml.html">write.ml</a> +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D sparkR.session() +##D t <- as.data.frame(Titanic) +##D df <- createDataFrame(t) +##D model <- spark.bisectingKmeans(df, Class ~ Survived, k = 4) +##D summary(model) +##D +##D # get fitted result from a bisecting k-means model +##D fitted.model <- fitted(model, "centers") +##D showDF(fitted.model) +##D +##D # fitted values on training data +##D fitted <- predict(model, df) +##D head(select(fitted, "Class", "prediction")) +##D +##D # save fitted model to input path +##D path <- "path/to/model" +##D write.ml(model, path) +##D +##D # can also read back the saved model and print +##D savedModel <- read.ml(path) +##D summary(savedModel) +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.decisionTree.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.decisionTree.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.decisionTree.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,233 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Decision Tree Model for Regression and Classification</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for spark.decisionTree {SparkR}"><tr><td>spark.decisionTree {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Decision Tree Model for Regression and Classification</h2> + +<h3>Description</h3> + +<p><code>spark.decisionTree</code> fits a Decision Tree Regression model or Classification model on +a SparkDataFrame. Users can call <code>summary</code> to get a summary of the fitted Decision Tree +model, <code>predict</code> to make predictions on new data, and <code>write.ml</code>/<code>read.ml</code> to +save/load fitted models. +For more details, see +<a href="http://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-regression"> +Decision Tree Regression</a> and +<a href="http://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-classifier"> +Decision Tree Classification</a> +</p> + + +<h3>Usage</h3> + +<pre> +spark.decisionTree(data, formula, ...) + +## S4 method for signature 'SparkDataFrame,formula' +spark.decisionTree(data, formula, + type = c("regression", "classification"), maxDepth = 5, maxBins = 32, + impurity = NULL, seed = NULL, minInstancesPerNode = 1, + minInfoGain = 0, checkpointInterval = 10, maxMemoryInMB = 256, + cacheNodeIds = FALSE, handleInvalid = c("error", "keep", "skip")) + +## S4 method for signature 'DecisionTreeRegressionModel' +summary(object) + +## S3 method for class 'summary.DecisionTreeRegressionModel' +print(x, ...) + +## S4 method for signature 'DecisionTreeClassificationModel' +summary(object) + +## S3 method for class 'summary.DecisionTreeClassificationModel' +print(x, ...) + +## S4 method for signature 'DecisionTreeRegressionModel' +predict(object, newData) + +## S4 method for signature 'DecisionTreeClassificationModel' +predict(object, newData) + +## S4 method for signature 'DecisionTreeRegressionModel,character' +write.ml(object, path, + overwrite = FALSE) + +## S4 method for signature 'DecisionTreeClassificationModel,character' +write.ml(object, path, + overwrite = FALSE) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>data</code></td> +<td> +<p>a SparkDataFrame for training.</p> +</td></tr> +<tr valign="top"><td><code>formula</code></td> +<td> +<p>a symbolic description of the model to be fitted. Currently only a few formula +operators are supported, including '~', ':', '+', and '-'.</p> +</td></tr> +<tr valign="top"><td><code>...</code></td> +<td> +<p>additional arguments passed to the method.</p> +</td></tr> +<tr valign="top"><td><code>type</code></td> +<td> +<p>type of model, one of "regression" or "classification", to fit</p> +</td></tr> +<tr valign="top"><td><code>maxDepth</code></td> +<td> +<p>Maximum depth of the tree (>= 0).</p> +</td></tr> +<tr valign="top"><td><code>maxBins</code></td> +<td> +<p>Maximum number of bins used for discretizing continuous features and for choosing +how to split on features at each node. More bins give higher granularity. Must be +>= 2 and >= number of categories in any categorical feature.</p> +</td></tr> +<tr valign="top"><td><code>impurity</code></td> +<td> +<p>Criterion used for information gain calculation. +For regression, must be "variance". For classification, must be one of +"entropy" and "gini", default is "gini".</p> +</td></tr> +<tr valign="top"><td><code>seed</code></td> +<td> +<p>integer seed for random number generation.</p> +</td></tr> +<tr valign="top"><td><code>minInstancesPerNode</code></td> +<td> +<p>Minimum number of instances each child must have after split.</p> +</td></tr> +<tr valign="top"><td><code>minInfoGain</code></td> +<td> +<p>Minimum information gain for a split to be considered at a tree node.</p> +</td></tr> +<tr valign="top"><td><code>checkpointInterval</code></td> +<td> +<p>Param for set checkpoint interval (>= 1) or disable checkpoint (-1). +Note: this setting will be ignored if the checkpoint directory is not +set.</p> +</td></tr> +<tr valign="top"><td><code>maxMemoryInMB</code></td> +<td> +<p>Maximum memory in MB allocated to histogram aggregation.</p> +</td></tr> +<tr valign="top"><td><code>cacheNodeIds</code></td> +<td> +<p>If FALSE, the algorithm will pass trees to executors to match instances with +nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching +can speed up training of deeper trees. Users can set how often should the +cache be checkpointed or disable it by setting checkpointInterval.</p> +</td></tr> +<tr valign="top"><td><code>handleInvalid</code></td> +<td> +<p>How to handle invalid data (unseen labels or NULL values) in features and +label column of string type in classification model. +Supported options: "skip" (filter out rows with invalid data), +"error" (throw an error), "keep" (put invalid data in +a special additional bucket, at index numLabels). Default +is "error".</p> +</td></tr> +<tr valign="top"><td><code>object</code></td> +<td> +<p>A fitted Decision Tree regression model or classification model.</p> +</td></tr> +<tr valign="top"><td><code>x</code></td> +<td> +<p>summary object of Decision Tree regression model or classification model +returned by <code>summary</code>.</p> +</td></tr> +<tr valign="top"><td><code>newData</code></td> +<td> +<p>a SparkDataFrame for testing.</p> +</td></tr> +<tr valign="top"><td><code>path</code></td> +<td> +<p>The directory where the model is saved.</p> +</td></tr> +<tr valign="top"><td><code>overwrite</code></td> +<td> +<p>Overwrites or not if the output path already exists. Default is FALSE +which means throw exception if the output path exists.</p> +</td></tr> +</table> + + +<h3>Value</h3> + +<p><code>spark.decisionTree</code> returns a fitted Decision Tree model. +</p> +<p><code>summary</code> returns summary information of the fitted model, which is a list. +The list of components includes <code>formula</code> (formula), +<code>numFeatures</code> (number of features), <code>features</code> (list of features), +<code>featureImportances</code> (feature importances), and <code>maxDepth</code> (max depth of +trees). +</p> +<p><code>predict</code> returns a SparkDataFrame containing predicted labeled in a column named +"prediction". +</p> + + +<h3>Note</h3> + +<p>spark.decisionTree since 2.3.0 +</p> +<p>summary(DecisionTreeRegressionModel) since 2.3.0 +</p> +<p>print.summary.DecisionTreeRegressionModel since 2.3.0 +</p> +<p>summary(DecisionTreeClassificationModel) since 2.3.0 +</p> +<p>print.summary.DecisionTreeClassificationModel since 2.3.0 +</p> +<p>predict(DecisionTreeRegressionModel) since 2.3.0 +</p> +<p>predict(DecisionTreeClassificationModel) since 2.3.0 +</p> +<p>write.ml(DecisionTreeRegressionModel, character) since 2.3.0 +</p> +<p>write.ml(DecisionTreeClassificationModel, character) since 2.3.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D # fit a Decision Tree Regression Model +##D df <- createDataFrame(longley) +##D model <- spark.decisionTree(df, Employed ~ ., type = "regression", maxDepth = 5, maxBins = 16) +##D +##D # get the summary of the model +##D summary(model) +##D +##D # make predictions +##D predictions <- predict(model, df) +##D +##D # save and load the model +##D path <- "path/to/model" +##D write.ml(model, path) +##D savedModel <- read.ml(path) +##D summary(savedModel) +##D +##D # fit a Decision Tree Classification Model +##D t <- as.data.frame(Titanic) +##D df <- createDataFrame(t) +##D model <- spark.decisionTree(df, Survived ~ Freq + Age, "classification") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.fpGrowth.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.fpGrowth.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.fpGrowth.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,180 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: FP-growth</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for spark.fpGrowth {SparkR}"><tr><td>spark.fpGrowth {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>FP-growth</h2> + +<h3>Description</h3> + +<p>A parallel FP-growth algorithm to mine frequent itemsets. +<code>spark.fpGrowth</code> fits a FP-growth model on a SparkDataFrame. Users can +<code>spark.freqItemsets</code> to get frequent itemsets, <code>spark.associationRules</code> to get +association rules, <code>predict</code> to make predictions on new data based on generated association +rules, and <code>write.ml</code>/<code>read.ml</code> to save/load fitted models. +For more details, see +<a href="https://spark.apache.org/docs/latest/mllib-frequent-pattern-mining.html#fp-growth"> +FP-growth</a>. +</p> + + +<h3>Usage</h3> + +<pre> +spark.fpGrowth(data, ...) + +spark.freqItemsets(object) + +spark.associationRules(object) + +## S4 method for signature 'SparkDataFrame' +spark.fpGrowth(data, minSupport = 0.3, + minConfidence = 0.8, itemsCol = "items", numPartitions = NULL) + +## S4 method for signature 'FPGrowthModel' +spark.freqItemsets(object) + +## S4 method for signature 'FPGrowthModel' +spark.associationRules(object) + +## S4 method for signature 'FPGrowthModel' +predict(object, newData) + +## S4 method for signature 'FPGrowthModel,character' +write.ml(object, path, overwrite = FALSE) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>data</code></td> +<td> +<p>A SparkDataFrame for training.</p> +</td></tr> +<tr valign="top"><td><code>...</code></td> +<td> +<p>additional argument(s) passed to the method.</p> +</td></tr> +<tr valign="top"><td><code>object</code></td> +<td> +<p>a fitted FPGrowth model.</p> +</td></tr> +<tr valign="top"><td><code>minSupport</code></td> +<td> +<p>Minimal support level.</p> +</td></tr> +<tr valign="top"><td><code>minConfidence</code></td> +<td> +<p>Minimal confidence level.</p> +</td></tr> +<tr valign="top"><td><code>itemsCol</code></td> +<td> +<p>Features column name.</p> +</td></tr> +<tr valign="top"><td><code>numPartitions</code></td> +<td> +<p>Number of partitions used for fitting.</p> +</td></tr> +<tr valign="top"><td><code>newData</code></td> +<td> +<p>a SparkDataFrame for testing.</p> +</td></tr> +<tr valign="top"><td><code>path</code></td> +<td> +<p>the directory where the model is saved.</p> +</td></tr> +<tr valign="top"><td><code>overwrite</code></td> +<td> +<p>logical value indicating whether to overwrite if the output path +already exists. Default is FALSE which means throw exception +if the output path exists.</p> +</td></tr> +</table> + + +<h3>Value</h3> + +<p><code>spark.fpGrowth</code> returns a fitted FPGrowth model. +</p> +<p>A <code>SparkDataFrame</code> with frequent itemsets. +The <code>SparkDataFrame</code> contains two columns: +<code>items</code> (an array of the same type as the input column) +and <code>freq</code> (frequency of the itemset). +</p> +<p>A <code>SparkDataFrame</code> with association rules. +The <code>SparkDataFrame</code> contains three columns: +<code>antecedent</code> (an array of the same type as the input column), +<code>consequent</code> (an array of the same type as the input column), +and <code>condfidence</code> (confidence). +</p> +<p><code>predict</code> returns a SparkDataFrame containing predicted values. +</p> + + +<h3>Note</h3> + +<p>spark.fpGrowth since 2.2.0 +</p> +<p>spark.freqItemsets(FPGrowthModel) since 2.2.0 +</p> +<p>spark.associationRules(FPGrowthModel) since 2.2.0 +</p> +<p>predict(FPGrowthModel) since 2.2.0 +</p> +<p>write.ml(FPGrowthModel, character) since 2.2.0 +</p> + + +<h3>See Also</h3> + +<p><a href="read.ml.html">read.ml</a> +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D raw_data <- read.df( +##D "data/mllib/sample_fpgrowth.txt", +##D source = "csv", +##D schema = structType(structField("raw_items", "string"))) +##D +##D data <- selectExpr(raw_data, "split(raw_items, ' ') as items") +##D model <- spark.fpGrowth(data) +##D +##D # Show frequent itemsets +##D frequent_itemsets <- spark.freqItemsets(model) +##D showDF(frequent_itemsets) +##D +##D # Show association rules +##D association_rules <- spark.associationRules(model) +##D showDF(association_rules) +##D +##D # Predict on new data +##D new_itemsets <- data.frame(items = c("t", "t,s")) +##D new_data <- selectExpr(createDataFrame(new_itemsets), "split(items, ',') as items") +##D predict(model, new_data) +##D +##D # Save and load model +##D path <- "/path/to/model" +##D write.ml(model, path) +##D read.ml(path) +##D +##D # Optional arguments +##D baskets_data <- selectExpr(createDataFrame(itemsets), "split(items, ',') as baskets") +##D another_model <- spark.fpGrowth(data, minSupport = 0.1, minConfidence = 0.5, +##D itemsCol = "baskets", numPartitions = 10) +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.gaussianMixture.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.gaussianMixture.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.gaussianMixture.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,156 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Multivariate Gaussian Mixture Model (GMM)</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for spark.gaussianMixture {SparkR}"><tr><td>spark.gaussianMixture {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Multivariate Gaussian Mixture Model (GMM)</h2> + +<h3>Description</h3> + +<p>Fits multivariate gaussian mixture model against a SparkDataFrame, similarly to R's +mvnormalmixEM(). Users can call <code>summary</code> to print a summary of the fitted model, +<code>predict</code> to make predictions on new data, and <code>write.ml</code>/<code>read.ml</code> +to save/load fitted models. +</p> + + +<h3>Usage</h3> + +<pre> +spark.gaussianMixture(data, formula, ...) + +## S4 method for signature 'SparkDataFrame,formula' +spark.gaussianMixture(data, formula, k = 2, + maxIter = 100, tol = 0.01) + +## S4 method for signature 'GaussianMixtureModel' +summary(object) + +## S4 method for signature 'GaussianMixtureModel' +predict(object, newData) + +## S4 method for signature 'GaussianMixtureModel,character' +write.ml(object, path, + overwrite = FALSE) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>data</code></td> +<td> +<p>a SparkDataFrame for training.</p> +</td></tr> +<tr valign="top"><td><code>formula</code></td> +<td> +<p>a symbolic description of the model to be fitted. Currently only a few formula +operators are supported, including '~', '.', ':', '+', and '-'. +Note that the response variable of formula is empty in spark.gaussianMixture.</p> +</td></tr> +<tr valign="top"><td><code>...</code></td> +<td> +<p>additional arguments passed to the method.</p> +</td></tr> +<tr valign="top"><td><code>k</code></td> +<td> +<p>number of independent Gaussians in the mixture model.</p> +</td></tr> +<tr valign="top"><td><code>maxIter</code></td> +<td> +<p>maximum iteration number.</p> +</td></tr> +<tr valign="top"><td><code>tol</code></td> +<td> +<p>the convergence tolerance.</p> +</td></tr> +<tr valign="top"><td><code>object</code></td> +<td> +<p>a fitted gaussian mixture model.</p> +</td></tr> +<tr valign="top"><td><code>newData</code></td> +<td> +<p>a SparkDataFrame for testing.</p> +</td></tr> +<tr valign="top"><td><code>path</code></td> +<td> +<p>the directory where the model is saved.</p> +</td></tr> +<tr valign="top"><td><code>overwrite</code></td> +<td> +<p>overwrites or not if the output path already exists. Default is FALSE +which means throw exception if the output path exists.</p> +</td></tr> +</table> + + +<h3>Value</h3> + +<p><code>spark.gaussianMixture</code> returns a fitted multivariate gaussian mixture model. +</p> +<p><code>summary</code> returns summary of the fitted model, which is a list. +The list includes the model's <code>lambda</code> (lambda), <code>mu</code> (mu), +<code>sigma</code> (sigma), <code>loglik</code> (loglik), and <code>posterior</code> (posterior). +</p> +<p><code>predict</code> returns a SparkDataFrame containing predicted labels in a column named +"prediction". +</p> + + +<h3>Note</h3> + +<p>spark.gaussianMixture since 2.1.0 +</p> +<p>summary(GaussianMixtureModel) since 2.1.0 +</p> +<p>predict(GaussianMixtureModel) since 2.1.0 +</p> +<p>write.ml(GaussianMixtureModel, character) since 2.1.0 +</p> + + +<h3>See Also</h3> + +<p>mixtools: <a href="https://cran.r-project.org/package=mixtools">https://cran.r-project.org/package=mixtools</a> +</p> +<p><a href="predict.html">predict</a>, <a href="read.ml.html">read.ml</a>, <a href="write.ml.html">write.ml</a> +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D sparkR.session() +##D library(mvtnorm) +##D set.seed(100) +##D a <- rmvnorm(4, c(0, 0)) +##D b <- rmvnorm(6, c(3, 4)) +##D data <- rbind(a, b) +##D df <- createDataFrame(as.data.frame(data)) +##D model <- spark.gaussianMixture(df, ~ V1 + V2, k = 2) +##D summary(model) +##D +##D # fitted values on training data +##D fitted <- predict(model, df) +##D head(select(fitted, "V1", "prediction")) +##D +##D # save fitted model to input path +##D path <- "path/to/model" +##D write.ml(model, path) +##D +##D # can also read back the saved model and print +##D savedModel <- read.ml(path) +##D summary(savedModel) +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.gbt.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.gbt.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.gbt.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,257 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Gradient Boosted Tree Model for Regression and Classification</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for spark.gbt {SparkR}"><tr><td>spark.gbt {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Gradient Boosted Tree Model for Regression and Classification</h2> + +<h3>Description</h3> + +<p><code>spark.gbt</code> fits a Gradient Boosted Tree Regression model or Classification model on a +SparkDataFrame. Users can call <code>summary</code> to get a summary of the fitted +Gradient Boosted Tree model, <code>predict</code> to make predictions on new data, and +<code>write.ml</code>/<code>read.ml</code> to save/load fitted models. +For more details, see +<a href="http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-regression"> +GBT Regression</a> and +<a href="http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-classifier"> +GBT Classification</a> +</p> + + +<h3>Usage</h3> + +<pre> +spark.gbt(data, formula, ...) + +## S4 method for signature 'SparkDataFrame,formula' +spark.gbt(data, formula, + type = c("regression", "classification"), maxDepth = 5, maxBins = 32, + maxIter = 20, stepSize = 0.1, lossType = NULL, seed = NULL, + subsamplingRate = 1, minInstancesPerNode = 1, minInfoGain = 0, + checkpointInterval = 10, maxMemoryInMB = 256, cacheNodeIds = FALSE, + handleInvalid = c("error", "keep", "skip")) + +## S4 method for signature 'GBTRegressionModel' +summary(object) + +## S3 method for class 'summary.GBTRegressionModel' +print(x, ...) + +## S4 method for signature 'GBTClassificationModel' +summary(object) + +## S3 method for class 'summary.GBTClassificationModel' +print(x, ...) + +## S4 method for signature 'GBTRegressionModel' +predict(object, newData) + +## S4 method for signature 'GBTClassificationModel' +predict(object, newData) + +## S4 method for signature 'GBTRegressionModel,character' +write.ml(object, path, + overwrite = FALSE) + +## S4 method for signature 'GBTClassificationModel,character' +write.ml(object, path, + overwrite = FALSE) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>data</code></td> +<td> +<p>a SparkDataFrame for training.</p> +</td></tr> +<tr valign="top"><td><code>formula</code></td> +<td> +<p>a symbolic description of the model to be fitted. Currently only a few formula +operators are supported, including '~', ':', '+', and '-'.</p> +</td></tr> +<tr valign="top"><td><code>...</code></td> +<td> +<p>additional arguments passed to the method.</p> +</td></tr> +<tr valign="top"><td><code>type</code></td> +<td> +<p>type of model, one of "regression" or "classification", to fit</p> +</td></tr> +<tr valign="top"><td><code>maxDepth</code></td> +<td> +<p>Maximum depth of the tree (>= 0).</p> +</td></tr> +<tr valign="top"><td><code>maxBins</code></td> +<td> +<p>Maximum number of bins used for discretizing continuous features and for choosing +how to split on features at each node. More bins give higher granularity. Must be +>= 2 and >= number of categories in any categorical feature.</p> +</td></tr> +<tr valign="top"><td><code>maxIter</code></td> +<td> +<p>Param for maximum number of iterations (>= 0).</p> +</td></tr> +<tr valign="top"><td><code>stepSize</code></td> +<td> +<p>Param for Step size to be used for each iteration of optimization.</p> +</td></tr> +<tr valign="top"><td><code>lossType</code></td> +<td> +<p>Loss function which GBT tries to minimize. +For classification, must be "logistic". For regression, must be one of +"squared" (L2) and "absolute" (L1), default is "squared".</p> +</td></tr> +<tr valign="top"><td><code>seed</code></td> +<td> +<p>integer seed for random number generation.</p> +</td></tr> +<tr valign="top"><td><code>subsamplingRate</code></td> +<td> +<p>Fraction of the training data used for learning each decision tree, in +range (0, 1].</p> +</td></tr> +<tr valign="top"><td><code>minInstancesPerNode</code></td> +<td> +<p>Minimum number of instances each child must have after split. If a +split causes the left or right child to have fewer than +minInstancesPerNode, the split will be discarded as invalid. Should be +>= 1.</p> +</td></tr> +<tr valign="top"><td><code>minInfoGain</code></td> +<td> +<p>Minimum information gain for a split to be considered at a tree node.</p> +</td></tr> +<tr valign="top"><td><code>checkpointInterval</code></td> +<td> +<p>Param for set checkpoint interval (>= 1) or disable checkpoint (-1). +Note: this setting will be ignored if the checkpoint directory is not +set.</p> +</td></tr> +<tr valign="top"><td><code>maxMemoryInMB</code></td> +<td> +<p>Maximum memory in MB allocated to histogram aggregation.</p> +</td></tr> +<tr valign="top"><td><code>cacheNodeIds</code></td> +<td> +<p>If FALSE, the algorithm will pass trees to executors to match instances with +nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching +can speed up training of deeper trees. Users can set how often should the +cache be checkpointed or disable it by setting checkpointInterval.</p> +</td></tr> +<tr valign="top"><td><code>handleInvalid</code></td> +<td> +<p>How to handle invalid data (unseen labels or NULL values) in features and +label column of string type in classification model. +Supported options: "skip" (filter out rows with invalid data), +"error" (throw an error), "keep" (put invalid data in +a special additional bucket, at index numLabels). Default +is "error".</p> +</td></tr> +<tr valign="top"><td><code>object</code></td> +<td> +<p>A fitted Gradient Boosted Tree regression model or classification model.</p> +</td></tr> +<tr valign="top"><td><code>x</code></td> +<td> +<p>summary object of Gradient Boosted Tree regression model or classification model +returned by <code>summary</code>.</p> +</td></tr> +<tr valign="top"><td><code>newData</code></td> +<td> +<p>a SparkDataFrame for testing.</p> +</td></tr> +<tr valign="top"><td><code>path</code></td> +<td> +<p>The directory where the model is saved.</p> +</td></tr> +<tr valign="top"><td><code>overwrite</code></td> +<td> +<p>Overwrites or not if the output path already exists. Default is FALSE +which means throw exception if the output path exists.</p> +</td></tr> +</table> + + +<h3>Value</h3> + +<p><code>spark.gbt</code> returns a fitted Gradient Boosted Tree model. +</p> +<p><code>summary</code> returns summary information of the fitted model, which is a list. +The list of components includes <code>formula</code> (formula), +<code>numFeatures</code> (number of features), <code>features</code> (list of features), +<code>featureImportances</code> (feature importances), <code>maxDepth</code> (max depth of trees), +<code>numTrees</code> (number of trees), and <code>treeWeights</code> (tree weights). +</p> +<p><code>predict</code> returns a SparkDataFrame containing predicted labeled in a column named +"prediction". +</p> + + +<h3>Note</h3> + +<p>spark.gbt since 2.1.0 +</p> +<p>summary(GBTRegressionModel) since 2.1.0 +</p> +<p>print.summary.GBTRegressionModel since 2.1.0 +</p> +<p>summary(GBTClassificationModel) since 2.1.0 +</p> +<p>print.summary.GBTClassificationModel since 2.1.0 +</p> +<p>predict(GBTRegressionModel) since 2.1.0 +</p> +<p>predict(GBTClassificationModel) since 2.1.0 +</p> +<p>write.ml(GBTRegressionModel, character) since 2.1.0 +</p> +<p>write.ml(GBTClassificationModel, character) since 2.1.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D # fit a Gradient Boosted Tree Regression Model +##D df <- createDataFrame(longley) +##D model <- spark.gbt(df, Employed ~ ., type = "regression", maxDepth = 5, maxBins = 16) +##D +##D # get the summary of the model +##D summary(model) +##D +##D # make predictions +##D predictions <- predict(model, df) +##D +##D # save and load the model +##D path <- "path/to/model" +##D write.ml(model, path) +##D savedModel <- read.ml(path) +##D summary(savedModel) +##D +##D # fit a Gradient Boosted Tree Classification Model +##D # label must be binary - Only binary classification is supported for GBT. +##D t <- as.data.frame(Titanic) +##D df <- createDataFrame(t) +##D model <- spark.gbt(df, Survived ~ Age + Freq, "classification") +##D +##D # numeric label is also supported +##D t2 <- as.data.frame(Titanic) +##D t2$NumericGender <- ifelse(t2$Sex == "Male", 0, 1) +##D df <- createDataFrame(t2) +##D model <- spark.gbt(df, NumericGender ~ ., type = "classification") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.getSparkFiles.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.getSparkFiles.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.getSparkFiles.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,59 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Get the absolute path of a file added through spark.addFile.</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for spark.getSparkFiles {SparkR}"><tr><td>spark.getSparkFiles {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Get the absolute path of a file added through spark.addFile.</h2> + +<h3>Description</h3> + +<p>Get the absolute path of a file added through spark.addFile. +</p> + + +<h3>Usage</h3> + +<pre> +spark.getSparkFiles(fileName) +</pre> + + +<h3>Arguments</h3> + +<table summary="R argblock"> +<tr valign="top"><td><code>fileName</code></td> +<td> +<p>The name of the file added through spark.addFile</p> +</td></tr> +</table> + + +<h3>Value</h3> + +<p>the absolute path of a file added through spark.addFile. +</p> + + +<h3>Note</h3> + +<p>spark.getSparkFiles since 2.1.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D spark.getSparkFiles("myfile") +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> Added: dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.getSparkFilesRootDirectory.html ============================================================================== --- dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.getSparkFilesRootDirectory.html (added) +++ dev/spark/v2.3.0-rc1-docs/_site/api/R/spark.getSparkFilesRootDirectory.html Sat Jan 13 10:29:47 2018 @@ -0,0 +1,49 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title>R: Get the root directory that contains files added through...</title> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" type="text/css" href="R.css" /> + +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css"> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script> +<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script> +<script>hljs.initHighlightingOnLoad();</script> +</head><body> + +<table width="100%" summary="page for spark.getSparkFilesRootDirectory {SparkR}"><tr><td>spark.getSparkFilesRootDirectory {SparkR}</td><td style="text-align: right;">R Documentation</td></tr></table> + +<h2>Get the root directory that contains files added through spark.addFile.</h2> + +<h3>Description</h3> + +<p>Get the root directory that contains files added through spark.addFile. +</p> + + +<h3>Usage</h3> + +<pre> +spark.getSparkFilesRootDirectory() +</pre> + + +<h3>Value</h3> + +<p>the root directory that contains files added through spark.addFile +</p> + + +<h3>Note</h3> + +<p>spark.getSparkFilesRootDirectory since 2.1.0 +</p> + + +<h3>Examples</h3> + +<pre><code class="r">## Not run: +##D spark.getSparkFilesRootDirectory() +## End(Not run) +</code></pre> + + +<hr /><div style="text-align: center;">[Package <em>SparkR</em> version 2.3.0 <a href="00Index.html">Index</a>]</div> +</body></html> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org