http://git-wip-us.apache.org/repos/asf/spark-website/blob/d2bcf185/site/docs/2.1.0/ml-migration-guides.html
----------------------------------------------------------------------
diff --git a/site/docs/2.1.0/ml-migration-guides.html 
b/site/docs/2.1.0/ml-migration-guides.html
index 5e8a913..24dfc31 100644
--- a/site/docs/2.1.0/ml-migration-guides.html
+++ b/site/docs/2.1.0/ml-migration-guides.html
@@ -344,21 +344,21 @@ for converting to <code>mllib.linalg</code> types.</p>
 <div class="codetabs">
 <div data-lang="scala">
 
-    <div class="highlight"><pre><code class="language-scala" 
data-lang="scala"><span class="k">import</span> <span 
class="nn">org.apache.spark.mllib.util.MLUtils</span>
+    <figure class="highlight"><pre><code class="language-scala" 
data-lang="scala"><span></span><span class="k">import</span> <span 
class="nn">org.apache.spark.mllib.util.MLUtils</span>
 
 <span class="c1">// convert DataFrame columns</span>
 <span class="k">val</span> <span class="n">convertedVecDF</span> <span 
class="k">=</span> <span class="nc">MLUtils</span><span class="o">.</span><span 
class="n">convertVectorColumnsToML</span><span class="o">(</span><span 
class="n">vecDF</span><span class="o">)</span>
 <span class="k">val</span> <span class="n">convertedMatrixDF</span> <span 
class="k">=</span> <span class="nc">MLUtils</span><span class="o">.</span><span 
class="n">convertMatrixColumnsToML</span><span class="o">(</span><span 
class="n">matrixDF</span><span class="o">)</span>
 <span class="c1">// convert a single vector or matrix</span>
 <span class="k">val</span> <span class="n">mlVec</span><span 
class="k">:</span> <span class="kt">org.apache.spark.ml.linalg.Vector</span> 
<span class="o">=</span> <span class="n">mllibVec</span><span 
class="o">.</span><span class="n">asML</span>
-<span class="k">val</span> <span class="n">mlMat</span><span 
class="k">:</span> <span class="kt">org.apache.spark.ml.linalg.Matrix</span> 
<span class="o">=</span> <span class="n">mllibMat</span><span 
class="o">.</span><span class="n">asML</span></code></pre></div>
+<span class="k">val</span> <span class="n">mlMat</span><span 
class="k">:</span> <span class="kt">org.apache.spark.ml.linalg.Matrix</span> 
<span class="o">=</span> <span class="n">mllibMat</span><span 
class="o">.</span><span class="n">asML</span></code></pre></figure>
 
     <p>Refer to the <a 
href="api/scala/index.html#org.apache.spark.mllib.util.MLUtils$"><code>MLUtils</code>
 Scala docs</a> for further detail.</p>
   </div>
 
 <div data-lang="java">
 
-    <div class="highlight"><pre><code class="language-java" 
data-lang="java"><span class="kn">import</span> <span 
class="nn">org.apache.spark.mllib.util.MLUtils</span><span class="o">;</span>
+    <figure class="highlight"><pre><code class="language-java" 
data-lang="java"><span></span><span class="kn">import</span> <span 
class="nn">org.apache.spark.mllib.util.MLUtils</span><span class="o">;</span>
 <span class="kn">import</span> <span 
class="nn">org.apache.spark.sql.Dataset</span><span class="o">;</span>
 
 <span class="c1">// convert DataFrame columns</span>
@@ -366,21 +366,21 @@ for converting to <code>mllib.linalg</code> types.</p>
 <span class="n">Dataset</span><span class="o">&lt;</span><span 
class="n">Row</span><span class="o">&gt;</span> <span 
class="n">convertedMatrixDF</span> <span class="o">=</span> <span 
class="n">MLUtils</span><span class="o">.</span><span 
class="na">convertMatrixColumnsToML</span><span class="o">(</span><span 
class="n">matrixDF</span><span class="o">);</span>
 <span class="c1">// convert a single vector or matrix</span>
 <span class="n">org</span><span class="o">.</span><span 
class="na">apache</span><span class="o">.</span><span 
class="na">spark</span><span class="o">.</span><span class="na">ml</span><span 
class="o">.</span><span class="na">linalg</span><span class="o">.</span><span 
class="na">Vector</span> <span class="n">mlVec</span> <span class="o">=</span> 
<span class="n">mllibVec</span><span class="o">.</span><span 
class="na">asML</span><span class="o">();</span>
-<span class="n">org</span><span class="o">.</span><span 
class="na">apache</span><span class="o">.</span><span 
class="na">spark</span><span class="o">.</span><span class="na">ml</span><span 
class="o">.</span><span class="na">linalg</span><span class="o">.</span><span 
class="na">Matrix</span> <span class="n">mlMat</span> <span class="o">=</span> 
<span class="n">mllibMat</span><span class="o">.</span><span 
class="na">asML</span><span class="o">();</span></code></pre></div>
+<span class="n">org</span><span class="o">.</span><span 
class="na">apache</span><span class="o">.</span><span 
class="na">spark</span><span class="o">.</span><span class="na">ml</span><span 
class="o">.</span><span class="na">linalg</span><span class="o">.</span><span 
class="na">Matrix</span> <span class="n">mlMat</span> <span class="o">=</span> 
<span class="n">mllibMat</span><span class="o">.</span><span 
class="na">asML</span><span class="o">();</span></code></pre></figure>
 
     <p>Refer to the <a 
href="api/java/org/apache/spark/mllib/util/MLUtils.html"><code>MLUtils</code> 
Java docs</a> for further detail.</p>
   </div>
 
 <div data-lang="python">
 
-    <div class="highlight"><pre><code class="language-python" 
data-lang="python"><span class="kn">from</span> <span 
class="nn">pyspark.mllib.util</span> <span class="kn">import</span> <span 
class="n">MLUtils</span>
+    <figure class="highlight"><pre><code class="language-python" 
data-lang="python"><span></span><span class="kn">from</span> <span 
class="nn">pyspark.mllib.util</span> <span class="kn">import</span> <span 
class="n">MLUtils</span>
 
-<span class="c"># convert DataFrame columns</span>
+<span class="c1"># convert DataFrame columns</span>
 <span class="n">convertedVecDF</span> <span class="o">=</span> <span 
class="n">MLUtils</span><span class="o">.</span><span 
class="n">convertVectorColumnsToML</span><span class="p">(</span><span 
class="n">vecDF</span><span class="p">)</span>
 <span class="n">convertedMatrixDF</span> <span class="o">=</span> <span 
class="n">MLUtils</span><span class="o">.</span><span 
class="n">convertMatrixColumnsToML</span><span class="p">(</span><span 
class="n">matrixDF</span><span class="p">)</span>
-<span class="c"># convert a single vector or matrix</span>
+<span class="c1"># convert a single vector or matrix</span>
 <span class="n">mlVec</span> <span class="o">=</span> <span 
class="n">mllibVec</span><span class="o">.</span><span 
class="n">asML</span><span class="p">()</span>
-<span class="n">mlMat</span> <span class="o">=</span> <span 
class="n">mllibMat</span><span class="o">.</span><span 
class="n">asML</span><span class="p">()</span></code></pre></div>
+<span class="n">mlMat</span> <span class="o">=</span> <span 
class="n">mllibMat</span><span class="o">.</span><span 
class="n">asML</span><span class="p">()</span></code></pre></figure>
 
     <p>Refer to the <a 
href="api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils"><code>MLUtils</code>
 Python docs</a> for further detail.</p>
   </div>

http://git-wip-us.apache.org/repos/asf/spark-website/blob/d2bcf185/site/docs/2.1.0/ml-pipeline.html
----------------------------------------------------------------------
diff --git a/site/docs/2.1.0/ml-pipeline.html b/site/docs/2.1.0/ml-pipeline.html
index fe17564..b57afde 100644
--- a/site/docs/2.1.0/ml-pipeline.html
+++ b/site/docs/2.1.0/ml-pipeline.html
@@ -331,27 +331,27 @@ machine learning pipelines.</p>
 <p><strong>Table of Contents</strong></p>
 
 <ul id="markdown-toc">
-  <li><a href="#main-concepts-in-pipelines" 
id="markdown-toc-main-concepts-in-pipelines">Main concepts in Pipelines</a>    
<ul>
-      <li><a href="#dataframe" id="markdown-toc-dataframe">DataFrame</a></li>
-      <li><a href="#pipeline-components" 
id="markdown-toc-pipeline-components">Pipeline components</a>        <ul>
-          <li><a href="#transformers" 
id="markdown-toc-transformers">Transformers</a></li>
-          <li><a href="#estimators" 
id="markdown-toc-estimators">Estimators</a></li>
-          <li><a href="#properties-of-pipeline-components" 
id="markdown-toc-properties-of-pipeline-components">Properties of pipeline 
components</a></li>
+  <li><a href="#main-concepts-in-pipelines">Main concepts in Pipelines</a>    
<ul>
+      <li><a href="#dataframe">DataFrame</a></li>
+      <li><a href="#pipeline-components">Pipeline components</a>        <ul>
+          <li><a href="#transformers">Transformers</a></li>
+          <li><a href="#estimators">Estimators</a></li>
+          <li><a href="#properties-of-pipeline-components">Properties of 
pipeline components</a></li>
         </ul>
       </li>
-      <li><a href="#pipeline" id="markdown-toc-pipeline">Pipeline</a>        
<ul>
-          <li><a href="#how-it-works" id="markdown-toc-how-it-works">How it 
works</a></li>
-          <li><a href="#details" id="markdown-toc-details">Details</a></li>
+      <li><a href="#pipeline">Pipeline</a>        <ul>
+          <li><a href="#how-it-works">How it works</a></li>
+          <li><a href="#details">Details</a></li>
         </ul>
       </li>
-      <li><a href="#parameters" 
id="markdown-toc-parameters">Parameters</a></li>
-      <li><a href="#saving-and-loading-pipelines" 
id="markdown-toc-saving-and-loading-pipelines">Saving and Loading 
Pipelines</a></li>
+      <li><a href="#parameters">Parameters</a></li>
+      <li><a href="#saving-and-loading-pipelines">Saving and Loading 
Pipelines</a></li>
     </ul>
   </li>
-  <li><a href="#code-examples" id="markdown-toc-code-examples">Code 
examples</a>    <ul>
-      <li><a href="#example-estimator-transformer-and-param" 
id="markdown-toc-example-estimator-transformer-and-param">Example: Estimator, 
Transformer, and Param</a></li>
-      <li><a href="#example-pipeline" 
id="markdown-toc-example-pipeline">Example: Pipeline</a></li>
-      <li><a href="#model-selection-hyperparameter-tuning" 
id="markdown-toc-model-selection-hyperparameter-tuning">Model selection 
(hyperparameter tuning)</a></li>
+  <li><a href="#code-examples">Code examples</a>    <ul>
+      <li><a href="#example-estimator-transformer-and-param">Example: 
Estimator, Transformer, and Param</a></li>
+      <li><a href="#example-pipeline">Example: Pipeline</a></li>
+      <li><a href="#model-selection-hyperparameter-tuning">Model selection 
(hyperparameter tuning)</a></li>
     </ul>
   </li>
 </ul>
@@ -541,7 +541,7 @@ Refer to the [`Estimator` Scala 
docs](api/scala/index.html#org.apache.spark.ml.E
 the [`Transformer` Scala 
docs](api/scala/index.html#org.apache.spark.ml.Transformer) and
 the [`Params` Scala 
docs](api/scala/index.html#org.apache.spark.ml.param.Params) for details on the 
API.
 
-<div class="highlight"><pre><span class="k">import</span> <span 
class="nn">org.apache.spark.ml.classification.LogisticRegression</span>
+<div class="highlight"><pre><span></span><span class="k">import</span> <span 
class="nn">org.apache.spark.ml.classification.LogisticRegression</span>
 <span class="k">import</span> <span 
class="nn">org.apache.spark.ml.linalg.</span><span class="o">{</span><span 
class="nc">Vector</span><span class="o">,</span> <span 
class="nc">Vectors</span><span class="o">}</span>
 <span class="k">import</span> <span 
class="nn">org.apache.spark.ml.param.ParamMap</span>
 <span class="k">import</span> <span class="nn">org.apache.spark.sql.Row</span>
@@ -601,7 +601,7 @@ the [`Params` Scala 
docs](api/scala/index.html#org.apache.spark.ml.param.Params)
   <span class="o">.</span><span class="n">select</span><span 
class="o">(</span><span class="s">&quot;features&quot;</span><span 
class="o">,</span> <span class="s">&quot;label&quot;</span><span 
class="o">,</span> <span class="s">&quot;myProbability&quot;</span><span 
class="o">,</span> <span class="s">&quot;prediction&quot;</span><span 
class="o">)</span>
   <span class="o">.</span><span class="n">collect</span><span 
class="o">()</span>
   <span class="o">.</span><span class="n">foreach</span> <span 
class="o">{</span> <span class="k">case</span> <span class="nc">Row</span><span 
class="o">(</span><span class="n">features</span><span class="k">:</span> <span 
class="kt">Vector</span><span class="o">,</span> <span 
class="n">label</span><span class="k">:</span> <span 
class="kt">Double</span><span class="o">,</span> <span 
class="n">prob</span><span class="k">:</span> <span 
class="kt">Vector</span><span class="o">,</span> <span 
class="n">prediction</span><span class="k">:</span> <span 
class="kt">Double</span><span class="o">)</span> <span class="k">=&gt;</span>
-    <span class="n">println</span><span class="o">(</span><span 
class="n">s</span><span class="s">&quot;($features, $label) -&gt; prob=$prob, 
prediction=$prediction&quot;</span><span class="o">)</span>
+    <span class="n">println</span><span class="o">(</span><span 
class="s">s&quot;(</span><span class="si">$features</span><span class="s">, 
</span><span class="si">$label</span><span class="s">) -&gt; prob=</span><span 
class="si">$prob</span><span class="s">, prediction=</span><span 
class="si">$prediction</span><span class="s">&quot;</span><span 
class="o">)</span>
   <span class="o">}</span>
 </pre></div><div><small>Find full example code at 
"examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala"
 in the Spark repo.</small></div>
 </div>
@@ -612,7 +612,7 @@ Refer to the [`Estimator` Java 
docs](api/java/org/apache/spark/ml/Estimator.html
 the [`Transformer` Java docs](api/java/org/apache/spark/ml/Transformer.html) 
and
 the [`Params` Java docs](api/java/org/apache/spark/ml/param/Params.html) for 
details on the API.
 
-<div class="highlight"><pre><span class="kn">import</span> <span 
class="nn">java.util.Arrays</span><span class="o">;</span>
+<div class="highlight"><pre><span></span><span class="kn">import</span> <span 
class="nn">java.util.Arrays</span><span class="o">;</span>
 <span class="kn">import</span> <span class="nn">java.util.List</span><span 
class="o">;</span>
 
 <span class="kn">import</span> <span 
class="nn">org.apache.spark.ml.classification.LogisticRegression</span><span 
class="o">;</span>
@@ -635,14 +635,14 @@ the [`Params` Java 
docs](api/java/org/apache/spark/ml/param/Params.html) for det
     <span class="n">RowFactory</span><span class="o">.</span><span 
class="na">create</span><span class="o">(</span><span 
class="mf">0.0</span><span class="o">,</span> <span 
class="n">Vectors</span><span class="o">.</span><span 
class="na">dense</span><span class="o">(</span><span class="mf">2.0</span><span 
class="o">,</span> <span class="mf">1.3</span><span class="o">,</span> <span 
class="mf">1.0</span><span class="o">)),</span>
     <span class="n">RowFactory</span><span class="o">.</span><span 
class="na">create</span><span class="o">(</span><span 
class="mf">1.0</span><span class="o">,</span> <span 
class="n">Vectors</span><span class="o">.</span><span 
class="na">dense</span><span class="o">(</span><span class="mf">0.0</span><span 
class="o">,</span> <span class="mf">1.2</span><span class="o">,</span> <span 
class="o">-</span><span class="mf">0.5</span><span class="o">))</span>
 <span class="o">);</span>
-<span class="n">StructType</span> <span class="n">schema</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">StructType</span><span class="o">(</span><span class="k">new</span> 
<span class="n">StructField</span><span class="o">[]{</span>
-    <span class="k">new</span> <span class="nf">StructField</span><span 
class="o">(</span><span class="s">&quot;label&quot;</span><span 
class="o">,</span> <span class="n">DataTypes</span><span 
class="o">.</span><span class="na">DoubleType</span><span class="o">,</span> 
<span class="kc">false</span><span class="o">,</span> <span 
class="n">Metadata</span><span class="o">.</span><span 
class="na">empty</span><span class="o">()),</span>
-    <span class="k">new</span> <span class="nf">StructField</span><span 
class="o">(</span><span class="s">&quot;features&quot;</span><span 
class="o">,</span> <span class="k">new</span> <span 
class="nf">VectorUDT</span><span class="o">(),</span> <span 
class="kc">false</span><span class="o">,</span> <span 
class="n">Metadata</span><span class="o">.</span><span 
class="na">empty</span><span class="o">())</span>
+<span class="n">StructType</span> <span class="n">schema</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">StructType</span><span class="o">(</span><span class="k">new</span> 
<span class="n">StructField</span><span class="o">[]{</span>
+    <span class="k">new</span> <span class="n">StructField</span><span 
class="o">(</span><span class="s">&quot;label&quot;</span><span 
class="o">,</span> <span class="n">DataTypes</span><span 
class="o">.</span><span class="na">DoubleType</span><span class="o">,</span> 
<span class="kc">false</span><span class="o">,</span> <span 
class="n">Metadata</span><span class="o">.</span><span 
class="na">empty</span><span class="o">()),</span>
+    <span class="k">new</span> <span class="n">StructField</span><span 
class="o">(</span><span class="s">&quot;features&quot;</span><span 
class="o">,</span> <span class="k">new</span> <span 
class="n">VectorUDT</span><span class="o">(),</span> <span 
class="kc">false</span><span class="o">,</span> <span 
class="n">Metadata</span><span class="o">.</span><span 
class="na">empty</span><span class="o">())</span>
 <span class="o">});</span>
 <span class="n">Dataset</span><span class="o">&lt;</span><span 
class="n">Row</span><span class="o">&gt;</span> <span class="n">training</span> 
<span class="o">=</span> <span class="n">spark</span><span 
class="o">.</span><span class="na">createDataFrame</span><span 
class="o">(</span><span class="n">dataTraining</span><span class="o">,</span> 
<span class="n">schema</span><span class="o">);</span>
 
 <span class="c1">// Create a LogisticRegression instance. This instance is an 
Estimator.</span>
-<span class="n">LogisticRegression</span> <span class="n">lr</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">LogisticRegression</span><span class="o">();</span>
+<span class="n">LogisticRegression</span> <span class="n">lr</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">LogisticRegression</span><span class="o">();</span>
 <span class="c1">// Print out the parameters, documentation, and any default 
values.</span>
 <span class="n">System</span><span class="o">.</span><span 
class="na">out</span><span class="o">.</span><span 
class="na">println</span><span class="o">(</span><span 
class="s">&quot;LogisticRegression parameters:\n&quot;</span> <span 
class="o">+</span> <span class="n">lr</span><span class="o">.</span><span 
class="na">explainParams</span><span class="o">()</span> <span 
class="o">+</span> <span class="s">&quot;\n&quot;</span><span 
class="o">);</span>
 
@@ -658,13 +658,13 @@ the [`Params` Java 
docs](api/java/org/apache/spark/ml/param/Params.html) for det
 <span class="n">System</span><span class="o">.</span><span 
class="na">out</span><span class="o">.</span><span 
class="na">println</span><span class="o">(</span><span class="s">&quot;Model 1 
was fit using parameters: &quot;</span> <span class="o">+</span> <span 
class="n">model1</span><span class="o">.</span><span 
class="na">parent</span><span class="o">().</span><span 
class="na">extractParamMap</span><span class="o">());</span>
 
 <span class="c1">// We may alternatively specify parameters using a 
ParamMap.</span>
-<span class="n">ParamMap</span> <span class="n">paramMap</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">ParamMap</span><span class="o">()</span>
+<span class="n">ParamMap</span> <span class="n">paramMap</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">ParamMap</span><span class="o">()</span>
   <span class="o">.</span><span class="na">put</span><span 
class="o">(</span><span class="n">lr</span><span class="o">.</span><span 
class="na">maxIter</span><span class="o">().</span><span 
class="na">w</span><span class="o">(</span><span class="mi">20</span><span 
class="o">))</span>  <span class="c1">// Specify 1 Param.</span>
   <span class="o">.</span><span class="na">put</span><span 
class="o">(</span><span class="n">lr</span><span class="o">.</span><span 
class="na">maxIter</span><span class="o">(),</span> <span 
class="mi">30</span><span class="o">)</span>  <span class="c1">// This 
overwrites the original maxIter.</span>
   <span class="o">.</span><span class="na">put</span><span 
class="o">(</span><span class="n">lr</span><span class="o">.</span><span 
class="na">regParam</span><span class="o">().</span><span 
class="na">w</span><span class="o">(</span><span class="mf">0.1</span><span 
class="o">),</span> <span class="n">lr</span><span class="o">.</span><span 
class="na">threshold</span><span class="o">().</span><span 
class="na">w</span><span class="o">(</span><span class="mf">0.55</span><span 
class="o">));</span>  <span class="c1">// Specify multiple Params.</span>
 
 <span class="c1">// One can also combine ParamMaps.</span>
-<span class="n">ParamMap</span> <span class="n">paramMap2</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">ParamMap</span><span class="o">()</span>
+<span class="n">ParamMap</span> <span class="n">paramMap2</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">ParamMap</span><span class="o">()</span>
   <span class="o">.</span><span class="na">put</span><span 
class="o">(</span><span class="n">lr</span><span class="o">.</span><span 
class="na">probabilityCol</span><span class="o">().</span><span 
class="na">w</span><span class="o">(</span><span 
class="s">&quot;myProbability&quot;</span><span class="o">));</span>  <span 
class="c1">// Change output column name</span>
 <span class="n">ParamMap</span> <span class="n">paramMapCombined</span> <span 
class="o">=</span> <span class="n">paramMap</span><span class="o">.</span><span 
class="na">$plus$plus</span><span class="o">(</span><span 
class="n">paramMap2</span><span class="o">);</span>
 
@@ -687,7 +687,7 @@ the [`Params` Java 
docs](api/java/org/apache/spark/ml/param/Params.html) for det
 <span class="c1">// &#39;probability&#39; column since we renamed the 
lr.probabilityCol parameter previously.</span>
 <span class="n">Dataset</span><span class="o">&lt;</span><span 
class="n">Row</span><span class="o">&gt;</span> <span class="n">results</span> 
<span class="o">=</span> <span class="n">model2</span><span 
class="o">.</span><span class="na">transform</span><span 
class="o">(</span><span class="n">test</span><span class="o">);</span>
 <span class="n">Dataset</span><span class="o">&lt;</span><span 
class="n">Row</span><span class="o">&gt;</span> <span class="n">rows</span> 
<span class="o">=</span> <span class="n">results</span><span 
class="o">.</span><span class="na">select</span><span class="o">(</span><span 
class="s">&quot;features&quot;</span><span class="o">,</span> <span 
class="s">&quot;label&quot;</span><span class="o">,</span> <span 
class="s">&quot;myProbability&quot;</span><span class="o">,</span> <span 
class="s">&quot;prediction&quot;</span><span class="o">);</span>
-<span class="k">for</span> <span class="o">(</span><span class="n">Row</span> 
<span class="nl">r:</span> <span class="n">rows</span><span 
class="o">.</span><span class="na">collectAsList</span><span 
class="o">())</span> <span class="o">{</span>
+<span class="k">for</span> <span class="o">(</span><span class="n">Row</span> 
<span class="n">r</span><span class="o">:</span> <span 
class="n">rows</span><span class="o">.</span><span 
class="na">collectAsList</span><span class="o">())</span> <span 
class="o">{</span>
   <span class="n">System</span><span class="o">.</span><span 
class="na">out</span><span class="o">.</span><span 
class="na">println</span><span class="o">(</span><span 
class="s">&quot;(&quot;</span> <span class="o">+</span> <span 
class="n">r</span><span class="o">.</span><span class="na">get</span><span 
class="o">(</span><span class="mi">0</span><span class="o">)</span> <span 
class="o">+</span> <span class="s">&quot;, &quot;</span> <span 
class="o">+</span> <span class="n">r</span><span class="o">.</span><span 
class="na">get</span><span class="o">(</span><span class="mi">1</span><span 
class="o">)</span> <span class="o">+</span> <span class="s">&quot;) -&gt; 
prob=&quot;</span> <span class="o">+</span> <span class="n">r</span><span 
class="o">.</span><span class="na">get</span><span class="o">(</span><span 
class="mi">2</span><span class="o">)</span>
     <span class="o">+</span> <span class="s">&quot;, prediction=&quot;</span> 
<span class="o">+</span> <span class="n">r</span><span class="o">.</span><span 
class="na">get</span><span class="o">(</span><span class="mi">3</span><span 
class="o">));</span>
 <span class="o">}</span>
@@ -700,63 +700,63 @@ Refer to the [`Estimator` Python 
docs](api/python/pyspark.ml.html#pyspark.ml.Est
 the [`Transformer` Python 
docs](api/python/pyspark.ml.html#pyspark.ml.Transformer) and
 the [`Params` Python docs](api/python/pyspark.ml.html#pyspark.ml.param.Params) 
for more details on the API.
 
-<div class="highlight"><pre><span class="kn">from</span> <span 
class="nn">pyspark.ml.linalg</span> <span class="kn">import</span> <span 
class="n">Vectors</span>
+<div class="highlight"><pre><span></span><span class="kn">from</span> <span 
class="nn">pyspark.ml.linalg</span> <span class="kn">import</span> <span 
class="n">Vectors</span>
 <span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> 
<span class="kn">import</span> <span class="n">LogisticRegression</span>
 
-<span class="c"># Prepare training data from a list of (label, features) 
tuples.</span>
+<span class="c1"># Prepare training data from a list of (label, features) 
tuples.</span>
 <span class="n">training</span> <span class="o">=</span> <span 
class="n">spark</span><span class="o">.</span><span 
class="n">createDataFrame</span><span class="p">([</span>
     <span class="p">(</span><span class="mf">1.0</span><span 
class="p">,</span> <span class="n">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="p">([</span><span class="mf">0.0</span><span 
class="p">,</span> <span class="mf">1.1</span><span class="p">,</span> <span 
class="mf">0.1</span><span class="p">])),</span>
     <span class="p">(</span><span class="mf">0.0</span><span 
class="p">,</span> <span class="n">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="p">([</span><span class="mf">2.0</span><span 
class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span 
class="o">-</span><span class="mf">1.0</span><span class="p">])),</span>
     <span class="p">(</span><span class="mf">0.0</span><span 
class="p">,</span> <span class="n">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="p">([</span><span class="mf">2.0</span><span 
class="p">,</span> <span class="mf">1.3</span><span class="p">,</span> <span 
class="mf">1.0</span><span class="p">])),</span>
-    <span class="p">(</span><span class="mf">1.0</span><span 
class="p">,</span> <span class="n">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="p">([</span><span class="mf">0.0</span><span 
class="p">,</span> <span class="mf">1.2</span><span class="p">,</span> <span 
class="o">-</span><span class="mf">0.5</span><span class="p">]))],</span> <span 
class="p">[</span><span class="s">&quot;label&quot;</span><span 
class="p">,</span> <span class="s">&quot;features&quot;</span><span 
class="p">])</span>
+    <span class="p">(</span><span class="mf">1.0</span><span 
class="p">,</span> <span class="n">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="p">([</span><span class="mf">0.0</span><span 
class="p">,</span> <span class="mf">1.2</span><span class="p">,</span> <span 
class="o">-</span><span class="mf">0.5</span><span class="p">]))],</span> <span 
class="p">[</span><span class="s2">&quot;label&quot;</span><span 
class="p">,</span> <span class="s2">&quot;features&quot;</span><span 
class="p">])</span>
 
-<span class="c"># Create a LogisticRegression instance. This instance is an 
Estimator.</span>
+<span class="c1"># Create a LogisticRegression instance. This instance is an 
Estimator.</span>
 <span class="n">lr</span> <span class="o">=</span> <span 
class="n">LogisticRegression</span><span class="p">(</span><span 
class="n">maxIter</span><span class="o">=</span><span class="mi">10</span><span 
class="p">,</span> <span class="n">regParam</span><span class="o">=</span><span 
class="mf">0.01</span><span class="p">)</span>
-<span class="c"># Print out the parameters, documentation, and any default 
values.</span>
-<span class="k">print</span><span class="p">(</span><span 
class="s">&quot;LogisticRegression parameters:</span><span 
class="se">\n</span><span class="s">&quot;</span> <span class="o">+</span> 
<span class="n">lr</span><span class="o">.</span><span 
class="n">explainParams</span><span class="p">()</span> <span 
class="o">+</span> <span class="s">&quot;</span><span class="se">\n</span><span 
class="s">&quot;</span><span class="p">)</span>
+<span class="c1"># Print out the parameters, documentation, and any default 
values.</span>
+<span class="k">print</span><span class="p">(</span><span 
class="s2">&quot;LogisticRegression parameters:</span><span 
class="se">\n</span><span class="s2">&quot;</span> <span class="o">+</span> 
<span class="n">lr</span><span class="o">.</span><span 
class="n">explainParams</span><span class="p">()</span> <span 
class="o">+</span> <span class="s2">&quot;</span><span 
class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
 
-<span class="c"># Learn a LogisticRegression model. This uses the parameters 
stored in lr.</span>
+<span class="c1"># Learn a LogisticRegression model. This uses the parameters 
stored in lr.</span>
 <span class="n">model1</span> <span class="o">=</span> <span 
class="n">lr</span><span class="o">.</span><span class="n">fit</span><span 
class="p">(</span><span class="n">training</span><span class="p">)</span>
 
-<span class="c"># Since model1 is a Model (i.e., a transformer produced by an 
Estimator),</span>
-<span class="c"># we can view the parameters it used during fit().</span>
-<span class="c"># This prints the parameter (name: value) pairs, where names 
are unique IDs for this</span>
-<span class="c"># LogisticRegression instance.</span>
-<span class="k">print</span><span class="p">(</span><span 
class="s">&quot;Model 1 was fit using parameters: &quot;</span><span 
class="p">)</span>
+<span class="c1"># Since model1 is a Model (i.e., a transformer produced by an 
Estimator),</span>
+<span class="c1"># we can view the parameters it used during fit().</span>
+<span class="c1"># This prints the parameter (name: value) pairs, where names 
are unique IDs for this</span>
+<span class="c1"># LogisticRegression instance.</span>
+<span class="k">print</span><span class="p">(</span><span 
class="s2">&quot;Model 1 was fit using parameters: &quot;</span><span 
class="p">)</span>
 <span class="k">print</span><span class="p">(</span><span 
class="n">model1</span><span class="o">.</span><span 
class="n">extractParamMap</span><span class="p">())</span>
 
-<span class="c"># We may alternatively specify parameters using a Python 
dictionary as a paramMap</span>
+<span class="c1"># We may alternatively specify parameters using a Python 
dictionary as a paramMap</span>
 <span class="n">paramMap</span> <span class="o">=</span> <span 
class="p">{</span><span class="n">lr</span><span class="o">.</span><span 
class="n">maxIter</span><span class="p">:</span> <span 
class="mi">20</span><span class="p">}</span>
-<span class="n">paramMap</span><span class="p">[</span><span 
class="n">lr</span><span class="o">.</span><span class="n">maxIter</span><span 
class="p">]</span> <span class="o">=</span> <span class="mi">30</span>  <span 
class="c"># Specify 1 Param, overwriting the original maxIter.</span>
-<span class="n">paramMap</span><span class="o">.</span><span 
class="n">update</span><span class="p">({</span><span class="n">lr</span><span 
class="o">.</span><span class="n">regParam</span><span class="p">:</span> <span 
class="mf">0.1</span><span class="p">,</span> <span class="n">lr</span><span 
class="o">.</span><span class="n">threshold</span><span class="p">:</span> 
<span class="mf">0.55</span><span class="p">})</span>  <span class="c"># 
Specify multiple Params.</span>
+<span class="n">paramMap</span><span class="p">[</span><span 
class="n">lr</span><span class="o">.</span><span class="n">maxIter</span><span 
class="p">]</span> <span class="o">=</span> <span class="mi">30</span>  <span 
class="c1"># Specify 1 Param, overwriting the original maxIter.</span>
+<span class="n">paramMap</span><span class="o">.</span><span 
class="n">update</span><span class="p">({</span><span class="n">lr</span><span 
class="o">.</span><span class="n">regParam</span><span class="p">:</span> <span 
class="mf">0.1</span><span class="p">,</span> <span class="n">lr</span><span 
class="o">.</span><span class="n">threshold</span><span class="p">:</span> 
<span class="mf">0.55</span><span class="p">})</span>  <span class="c1"># 
Specify multiple Params.</span>
 
-<span class="c"># You can combine paramMaps, which are python 
dictionaries.</span>
-<span class="n">paramMap2</span> <span class="o">=</span> <span 
class="p">{</span><span class="n">lr</span><span class="o">.</span><span 
class="n">probabilityCol</span><span class="p">:</span> <span 
class="s">&quot;myProbability&quot;</span><span class="p">}</span>  <span 
class="c"># Change output column name</span>
+<span class="c1"># You can combine paramMaps, which are python 
dictionaries.</span>
+<span class="n">paramMap2</span> <span class="o">=</span> <span 
class="p">{</span><span class="n">lr</span><span class="o">.</span><span 
class="n">probabilityCol</span><span class="p">:</span> <span 
class="s2">&quot;myProbability&quot;</span><span class="p">}</span>  <span 
class="c1"># Change output column name</span>
 <span class="n">paramMapCombined</span> <span class="o">=</span> <span 
class="n">paramMap</span><span class="o">.</span><span 
class="n">copy</span><span class="p">()</span>
 <span class="n">paramMapCombined</span><span class="o">.</span><span 
class="n">update</span><span class="p">(</span><span 
class="n">paramMap2</span><span class="p">)</span>
 
-<span class="c"># Now learn a new model using the paramMapCombined 
parameters.</span>
-<span class="c"># paramMapCombined overrides all parameters set earlier via 
lr.set* methods.</span>
+<span class="c1"># Now learn a new model using the paramMapCombined 
parameters.</span>
+<span class="c1"># paramMapCombined overrides all parameters set earlier via 
lr.set* methods.</span>
 <span class="n">model2</span> <span class="o">=</span> <span 
class="n">lr</span><span class="o">.</span><span class="n">fit</span><span 
class="p">(</span><span class="n">training</span><span class="p">,</span> <span 
class="n">paramMapCombined</span><span class="p">)</span>
-<span class="k">print</span><span class="p">(</span><span 
class="s">&quot;Model 2 was fit using parameters: &quot;</span><span 
class="p">)</span>
+<span class="k">print</span><span class="p">(</span><span 
class="s2">&quot;Model 2 was fit using parameters: &quot;</span><span 
class="p">)</span>
 <span class="k">print</span><span class="p">(</span><span 
class="n">model2</span><span class="o">.</span><span 
class="n">extractParamMap</span><span class="p">())</span>
 
-<span class="c"># Prepare test data</span>
+<span class="c1"># Prepare test data</span>
 <span class="n">test</span> <span class="o">=</span> <span 
class="n">spark</span><span class="o">.</span><span 
class="n">createDataFrame</span><span class="p">([</span>
     <span class="p">(</span><span class="mf">1.0</span><span 
class="p">,</span> <span class="n">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="p">([</span><span class="o">-</span><span 
class="mf">1.0</span><span class="p">,</span> <span class="mf">1.5</span><span 
class="p">,</span> <span class="mf">1.3</span><span class="p">])),</span>
     <span class="p">(</span><span class="mf">0.0</span><span 
class="p">,</span> <span class="n">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="p">([</span><span class="mf">3.0</span><span 
class="p">,</span> <span class="mf">2.0</span><span class="p">,</span> <span 
class="o">-</span><span class="mf">0.1</span><span class="p">])),</span>
-    <span class="p">(</span><span class="mf">1.0</span><span 
class="p">,</span> <span class="n">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="p">([</span><span class="mf">0.0</span><span 
class="p">,</span> <span class="mf">2.2</span><span class="p">,</span> <span 
class="o">-</span><span class="mf">1.5</span><span class="p">]))],</span> <span 
class="p">[</span><span class="s">&quot;label&quot;</span><span 
class="p">,</span> <span class="s">&quot;features&quot;</span><span 
class="p">])</span>
+    <span class="p">(</span><span class="mf">1.0</span><span 
class="p">,</span> <span class="n">Vectors</span><span class="o">.</span><span 
class="n">dense</span><span class="p">([</span><span class="mf">0.0</span><span 
class="p">,</span> <span class="mf">2.2</span><span class="p">,</span> <span 
class="o">-</span><span class="mf">1.5</span><span class="p">]))],</span> <span 
class="p">[</span><span class="s2">&quot;label&quot;</span><span 
class="p">,</span> <span class="s2">&quot;features&quot;</span><span 
class="p">])</span>
 
-<span class="c"># Make predictions on test data using the 
Transformer.transform() method.</span>
-<span class="c"># LogisticRegression.transform will only use the 
&#39;features&#39; column.</span>
-<span class="c"># Note that model2.transform() outputs a 
&quot;myProbability&quot; column instead of the usual</span>
-<span class="c"># &#39;probability&#39; column since we renamed the 
lr.probabilityCol parameter previously.</span>
+<span class="c1"># Make predictions on test data using the 
Transformer.transform() method.</span>
+<span class="c1"># LogisticRegression.transform will only use the 
&#39;features&#39; column.</span>
+<span class="c1"># Note that model2.transform() outputs a 
&quot;myProbability&quot; column instead of the usual</span>
+<span class="c1"># &#39;probability&#39; column since we renamed the 
lr.probabilityCol parameter previously.</span>
 <span class="n">prediction</span> <span class="o">=</span> <span 
class="n">model2</span><span class="o">.</span><span 
class="n">transform</span><span class="p">(</span><span 
class="n">test</span><span class="p">)</span>
-<span class="n">result</span> <span class="o">=</span> <span 
class="n">prediction</span><span class="o">.</span><span 
class="n">select</span><span class="p">(</span><span 
class="s">&quot;features&quot;</span><span class="p">,</span> <span 
class="s">&quot;label&quot;</span><span class="p">,</span> <span 
class="s">&quot;myProbability&quot;</span><span class="p">,</span> <span 
class="s">&quot;prediction&quot;</span><span class="p">)</span> \
+<span class="n">result</span> <span class="o">=</span> <span 
class="n">prediction</span><span class="o">.</span><span 
class="n">select</span><span class="p">(</span><span 
class="s2">&quot;features&quot;</span><span class="p">,</span> <span 
class="s2">&quot;label&quot;</span><span class="p">,</span> <span 
class="s2">&quot;myProbability&quot;</span><span class="p">,</span> <span 
class="s2">&quot;prediction&quot;</span><span class="p">)</span> \
     <span class="o">.</span><span class="n">collect</span><span 
class="p">()</span>
 
 <span class="k">for</span> <span class="n">row</span> <span 
class="ow">in</span> <span class="n">result</span><span class="p">:</span>
-    <span class="k">print</span><span class="p">(</span><span 
class="s">&quot;features=</span><span class="si">%s</span><span class="s">, 
label=</span><span class="si">%s</span><span class="s"> -&gt; prob=</span><span 
class="si">%s</span><span class="s">, prediction=</span><span 
class="si">%s</span><span class="s">&quot;</span>
+    <span class="k">print</span><span class="p">(</span><span 
class="s2">&quot;features=</span><span class="si">%s</span><span class="s2">, 
label=</span><span class="si">%s</span><span class="s2"> -&gt; 
prob=</span><span class="si">%s</span><span class="s2">, 
prediction=</span><span class="si">%s</span><span class="s2">&quot;</span>
           <span class="o">%</span> <span class="p">(</span><span 
class="n">row</span><span class="o">.</span><span 
class="n">features</span><span class="p">,</span> <span 
class="n">row</span><span class="o">.</span><span class="n">label</span><span 
class="p">,</span> <span class="n">row</span><span class="o">.</span><span 
class="n">myProbability</span><span class="p">,</span> <span 
class="n">row</span><span class="o">.</span><span 
class="n">prediction</span><span class="p">))</span>
 </pre></div><div><small>Find full example code at 
"examples/src/main/python/ml/estimator_transformer_param_example.py" in the 
Spark repo.</small></div>
 </div>
@@ -773,7 +773,7 @@ the [`Params` Python 
docs](api/python/pyspark.ml.html#pyspark.ml.param.Params) f
 
 Refer to the [`Pipeline` Scala 
docs](api/scala/index.html#org.apache.spark.ml.Pipeline) for details on the API.
 
-<div class="highlight"><pre><span class="k">import</span> <span 
class="nn">org.apache.spark.ml.</span><span class="o">{</span><span 
class="nc">Pipeline</span><span class="o">,</span> <span 
class="nc">PipelineModel</span><span class="o">}</span>
+<div class="highlight"><pre><span></span><span class="k">import</span> <span 
class="nn">org.apache.spark.ml.</span><span class="o">{</span><span 
class="nc">Pipeline</span><span class="o">,</span> <span 
class="nc">PipelineModel</span><span class="o">}</span>
 <span class="k">import</span> <span 
class="nn">org.apache.spark.ml.classification.LogisticRegression</span>
 <span class="k">import</span> <span 
class="nn">org.apache.spark.ml.feature.</span><span class="o">{</span><span 
class="nc">HashingTF</span><span class="o">,</span> <span 
class="nc">Tokenizer</span><span class="o">}</span>
 <span class="k">import</span> <span 
class="nn">org.apache.spark.ml.linalg.Vector</span>
@@ -826,7 +826,7 @@ Refer to the [`Pipeline` Scala 
docs](api/scala/index.html#org.apache.spark.ml.Pi
   <span class="o">.</span><span class="n">select</span><span 
class="o">(</span><span class="s">&quot;id&quot;</span><span class="o">,</span> 
<span class="s">&quot;text&quot;</span><span class="o">,</span> <span 
class="s">&quot;probability&quot;</span><span class="o">,</span> <span 
class="s">&quot;prediction&quot;</span><span class="o">)</span>
   <span class="o">.</span><span class="n">collect</span><span 
class="o">()</span>
   <span class="o">.</span><span class="n">foreach</span> <span 
class="o">{</span> <span class="k">case</span> <span class="nc">Row</span><span 
class="o">(</span><span class="n">id</span><span class="k">:</span> <span 
class="kt">Long</span><span class="o">,</span> <span class="n">text</span><span 
class="k">:</span> <span class="kt">String</span><span class="o">,</span> <span 
class="n">prob</span><span class="k">:</span> <span 
class="kt">Vector</span><span class="o">,</span> <span 
class="n">prediction</span><span class="k">:</span> <span 
class="kt">Double</span><span class="o">)</span> <span class="k">=&gt;</span>
-    <span class="n">println</span><span class="o">(</span><span 
class="n">s</span><span class="s">&quot;($id, $text) --&gt; prob=$prob, 
prediction=$prediction&quot;</span><span class="o">)</span>
+    <span class="n">println</span><span class="o">(</span><span 
class="s">s&quot;(</span><span class="si">$id</span><span class="s">, 
</span><span class="si">$text</span><span class="s">) --&gt; prob=</span><span 
class="si">$prob</span><span class="s">, prediction=</span><span 
class="si">$prediction</span><span class="s">&quot;</span><span 
class="o">)</span>
   <span class="o">}</span>
 </pre></div><div><small>Find full example code at 
"examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala" in 
the Spark repo.</small></div>
 </div>
@@ -836,7 +836,7 @@ Refer to the [`Pipeline` Scala 
docs](api/scala/index.html#org.apache.spark.ml.Pi
 
 Refer to the [`Pipeline` Java 
docs](api/java/org/apache/spark/ml/Pipeline.html) for details on the API.
 
-<div class="highlight"><pre><span class="kn">import</span> <span 
class="nn">java.util.Arrays</span><span class="o">;</span>
+<div class="highlight"><pre><span></span><span class="kn">import</span> <span 
class="nn">java.util.Arrays</span><span class="o">;</span>
 
 <span class="kn">import</span> <span 
class="nn">org.apache.spark.ml.Pipeline</span><span class="o">;</span>
 <span class="kn">import</span> <span 
class="nn">org.apache.spark.ml.PipelineModel</span><span class="o">;</span>
@@ -849,24 +849,24 @@ Refer to the [`Pipeline` Java 
docs](api/java/org/apache/spark/ml/Pipeline.html)
 
 <span class="c1">// Prepare training documents, which are labeled.</span>
 <span class="n">Dataset</span><span class="o">&lt;</span><span 
class="n">Row</span><span class="o">&gt;</span> <span class="n">training</span> 
<span class="o">=</span> <span class="n">spark</span><span 
class="o">.</span><span class="na">createDataFrame</span><span 
class="o">(</span><span class="n">Arrays</span><span class="o">.</span><span 
class="na">asList</span><span class="o">(</span>
-  <span class="k">new</span> <span class="nf">JavaLabeledDocument</span><span 
class="o">(</span><span class="mi">0L</span><span class="o">,</span> <span 
class="s">&quot;a b c d e spark&quot;</span><span class="o">,</span> <span 
class="mf">1.0</span><span class="o">),</span>
-  <span class="k">new</span> <span class="nf">JavaLabeledDocument</span><span 
class="o">(</span><span class="mi">1L</span><span class="o">,</span> <span 
class="s">&quot;b d&quot;</span><span class="o">,</span> <span 
class="mf">0.0</span><span class="o">),</span>
-  <span class="k">new</span> <span class="nf">JavaLabeledDocument</span><span 
class="o">(</span><span class="mi">2L</span><span class="o">,</span> <span 
class="s">&quot;spark f g h&quot;</span><span class="o">,</span> <span 
class="mf">1.0</span><span class="o">),</span>
-  <span class="k">new</span> <span class="nf">JavaLabeledDocument</span><span 
class="o">(</span><span class="mi">3L</span><span class="o">,</span> <span 
class="s">&quot;hadoop mapreduce&quot;</span><span class="o">,</span> <span 
class="mf">0.0</span><span class="o">)</span>
+  <span class="k">new</span> <span class="n">JavaLabeledDocument</span><span 
class="o">(</span><span class="mi">0</span><span class="n">L</span><span 
class="o">,</span> <span class="s">&quot;a b c d e spark&quot;</span><span 
class="o">,</span> <span class="mf">1.0</span><span class="o">),</span>
+  <span class="k">new</span> <span class="n">JavaLabeledDocument</span><span 
class="o">(</span><span class="mi">1L</span><span class="o">,</span> <span 
class="s">&quot;b d&quot;</span><span class="o">,</span> <span 
class="mf">0.0</span><span class="o">),</span>
+  <span class="k">new</span> <span class="n">JavaLabeledDocument</span><span 
class="o">(</span><span class="mi">2L</span><span class="o">,</span> <span 
class="s">&quot;spark f g h&quot;</span><span class="o">,</span> <span 
class="mf">1.0</span><span class="o">),</span>
+  <span class="k">new</span> <span class="n">JavaLabeledDocument</span><span 
class="o">(</span><span class="mi">3L</span><span class="o">,</span> <span 
class="s">&quot;hadoop mapreduce&quot;</span><span class="o">,</span> <span 
class="mf">0.0</span><span class="o">)</span>
 <span class="o">),</span> <span class="n">JavaLabeledDocument</span><span 
class="o">.</span><span class="na">class</span><span class="o">);</span>
 
 <span class="c1">// Configure an ML pipeline, which consists of three stages: 
tokenizer, hashingTF, and lr.</span>
-<span class="n">Tokenizer</span> <span class="n">tokenizer</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">Tokenizer</span><span class="o">()</span>
+<span class="n">Tokenizer</span> <span class="n">tokenizer</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">Tokenizer</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setInputCol</span><span 
class="o">(</span><span class="s">&quot;text&quot;</span><span 
class="o">)</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span 
class="o">(</span><span class="s">&quot;words&quot;</span><span 
class="o">);</span>
-<span class="n">HashingTF</span> <span class="n">hashingTF</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">HashingTF</span><span class="o">()</span>
+<span class="n">HashingTF</span> <span class="n">hashingTF</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">HashingTF</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setNumFeatures</span><span 
class="o">(</span><span class="mi">1000</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setInputCol</span><span 
class="o">(</span><span class="n">tokenizer</span><span class="o">.</span><span 
class="na">getOutputCol</span><span class="o">())</span>
   <span class="o">.</span><span class="na">setOutputCol</span><span 
class="o">(</span><span class="s">&quot;features&quot;</span><span 
class="o">);</span>
-<span class="n">LogisticRegression</span> <span class="n">lr</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">LogisticRegression</span><span class="o">()</span>
+<span class="n">LogisticRegression</span> <span class="n">lr</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">LogisticRegression</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setMaxIter</span><span 
class="o">(</span><span class="mi">10</span><span class="o">)</span>
   <span class="o">.</span><span class="na">setRegParam</span><span 
class="o">(</span><span class="mf">0.001</span><span class="o">);</span>
-<span class="n">Pipeline</span> <span class="n">pipeline</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">Pipeline</span><span class="o">()</span>
+<span class="n">Pipeline</span> <span class="n">pipeline</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="n">Pipeline</span><span class="o">()</span>
   <span class="o">.</span><span class="na">setStages</span><span 
class="o">(</span><span class="k">new</span> <span 
class="n">PipelineStage</span><span class="o">[]</span> <span 
class="o">{</span><span class="n">tokenizer</span><span class="o">,</span> 
<span class="n">hashingTF</span><span class="o">,</span> <span 
class="n">lr</span><span class="o">});</span>
 
 <span class="c1">// Fit the pipeline to training documents.</span>
@@ -874,10 +874,10 @@ Refer to the [`Pipeline` Java 
docs](api/java/org/apache/spark/ml/Pipeline.html)
 
 <span class="c1">// Prepare test documents, which are unlabeled.</span>
 <span class="n">Dataset</span><span class="o">&lt;</span><span 
class="n">Row</span><span class="o">&gt;</span> <span class="n">test</span> 
<span class="o">=</span> <span class="n">spark</span><span 
class="o">.</span><span class="na">createDataFrame</span><span 
class="o">(</span><span class="n">Arrays</span><span class="o">.</span><span 
class="na">asList</span><span class="o">(</span>
-  <span class="k">new</span> <span class="nf">JavaDocument</span><span 
class="o">(</span><span class="mi">4L</span><span class="o">,</span> <span 
class="s">&quot;spark i j k&quot;</span><span class="o">),</span>
-  <span class="k">new</span> <span class="nf">JavaDocument</span><span 
class="o">(</span><span class="mi">5L</span><span class="o">,</span> <span 
class="s">&quot;l m n&quot;</span><span class="o">),</span>
-  <span class="k">new</span> <span class="nf">JavaDocument</span><span 
class="o">(</span><span class="mi">6L</span><span class="o">,</span> <span 
class="s">&quot;spark hadoop spark&quot;</span><span class="o">),</span>
-  <span class="k">new</span> <span class="nf">JavaDocument</span><span 
class="o">(</span><span class="mi">7L</span><span class="o">,</span> <span 
class="s">&quot;apache hadoop&quot;</span><span class="o">)</span>
+  <span class="k">new</span> <span class="n">JavaDocument</span><span 
class="o">(</span><span class="mi">4L</span><span class="o">,</span> <span 
class="s">&quot;spark i j k&quot;</span><span class="o">),</span>
+  <span class="k">new</span> <span class="n">JavaDocument</span><span 
class="o">(</span><span class="mi">5L</span><span class="o">,</span> <span 
class="s">&quot;l m n&quot;</span><span class="o">),</span>
+  <span class="k">new</span> <span class="n">JavaDocument</span><span 
class="o">(</span><span class="mi">6L</span><span class="o">,</span> <span 
class="s">&quot;spark hadoop spark&quot;</span><span class="o">),</span>
+  <span class="k">new</span> <span class="n">JavaDocument</span><span 
class="o">(</span><span class="mi">7L</span><span class="o">,</span> <span 
class="s">&quot;apache hadoop&quot;</span><span class="o">)</span>
 <span class="o">),</span> <span class="n">JavaDocument</span><span 
class="o">.</span><span class="na">class</span><span class="o">);</span>
 
 <span class="c1">// Make predictions on test documents.</span>
@@ -893,41 +893,41 @@ Refer to the [`Pipeline` Java 
docs](api/java/org/apache/spark/ml/Pipeline.html)
 
 Refer to the [`Pipeline` Python 
docs](api/python/pyspark.ml.html#pyspark.ml.Pipeline) for more details on the 
API.
 
-<div class="highlight"><pre><span class="kn">from</span> <span 
class="nn">pyspark.ml</span> <span class="kn">import</span> <span 
class="n">Pipeline</span>
+<div class="highlight"><pre><span></span><span class="kn">from</span> <span 
class="nn">pyspark.ml</span> <span class="kn">import</span> <span 
class="n">Pipeline</span>
 <span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> 
<span class="kn">import</span> <span class="n">LogisticRegression</span>
 <span class="kn">from</span> <span class="nn">pyspark.ml.feature</span> <span 
class="kn">import</span> <span class="n">HashingTF</span><span 
class="p">,</span> <span class="n">Tokenizer</span>
 
-<span class="c"># Prepare training documents from a list of (id, text, label) 
tuples.</span>
+<span class="c1"># Prepare training documents from a list of (id, text, label) 
tuples.</span>
 <span class="n">training</span> <span class="o">=</span> <span 
class="n">spark</span><span class="o">.</span><span 
class="n">createDataFrame</span><span class="p">([</span>
-    <span class="p">(</span><span class="mi">0</span><span class="p">,</span> 
<span class="s">&quot;a b c d e spark&quot;</span><span class="p">,</span> 
<span class="mf">1.0</span><span class="p">),</span>
-    <span class="p">(</span><span class="mi">1</span><span class="p">,</span> 
<span class="s">&quot;b d&quot;</span><span class="p">,</span> <span 
class="mf">0.0</span><span class="p">),</span>
-    <span class="p">(</span><span class="mi">2</span><span class="p">,</span> 
<span class="s">&quot;spark f g h&quot;</span><span class="p">,</span> <span 
class="mf">1.0</span><span class="p">),</span>
-    <span class="p">(</span><span class="mi">3</span><span class="p">,</span> 
<span class="s">&quot;hadoop mapreduce&quot;</span><span class="p">,</span> 
<span class="mf">0.0</span><span class="p">)</span>
-<span class="p">],</span> <span class="p">[</span><span 
class="s">&quot;id&quot;</span><span class="p">,</span> <span 
class="s">&quot;text&quot;</span><span class="p">,</span> <span 
class="s">&quot;label&quot;</span><span class="p">])</span>
-
-<span class="c"># Configure an ML pipeline, which consists of three stages: 
tokenizer, hashingTF, and lr.</span>
-<span class="n">tokenizer</span> <span class="o">=</span> <span 
class="n">Tokenizer</span><span class="p">(</span><span 
class="n">inputCol</span><span class="o">=</span><span 
class="s">&quot;text&quot;</span><span class="p">,</span> <span 
class="n">outputCol</span><span class="o">=</span><span 
class="s">&quot;words&quot;</span><span class="p">)</span>
-<span class="n">hashingTF</span> <span class="o">=</span> <span 
class="n">HashingTF</span><span class="p">(</span><span 
class="n">inputCol</span><span class="o">=</span><span 
class="n">tokenizer</span><span class="o">.</span><span 
class="n">getOutputCol</span><span class="p">(),</span> <span 
class="n">outputCol</span><span class="o">=</span><span 
class="s">&quot;features&quot;</span><span class="p">)</span>
+    <span class="p">(</span><span class="mi">0</span><span class="p">,</span> 
<span class="s2">&quot;a b c d e spark&quot;</span><span class="p">,</span> 
<span class="mf">1.0</span><span class="p">),</span>
+    <span class="p">(</span><span class="mi">1</span><span class="p">,</span> 
<span class="s2">&quot;b d&quot;</span><span class="p">,</span> <span 
class="mf">0.0</span><span class="p">),</span>
+    <span class="p">(</span><span class="mi">2</span><span class="p">,</span> 
<span class="s2">&quot;spark f g h&quot;</span><span class="p">,</span> <span 
class="mf">1.0</span><span class="p">),</span>
+    <span class="p">(</span><span class="mi">3</span><span class="p">,</span> 
<span class="s2">&quot;hadoop mapreduce&quot;</span><span class="p">,</span> 
<span class="mf">0.0</span><span class="p">)</span>
+<span class="p">],</span> <span class="p">[</span><span 
class="s2">&quot;id&quot;</span><span class="p">,</span> <span 
class="s2">&quot;text&quot;</span><span class="p">,</span> <span 
class="s2">&quot;label&quot;</span><span class="p">])</span>
+
+<span class="c1"># Configure an ML pipeline, which consists of three stages: 
tokenizer, hashingTF, and lr.</span>
+<span class="n">tokenizer</span> <span class="o">=</span> <span 
class="n">Tokenizer</span><span class="p">(</span><span 
class="n">inputCol</span><span class="o">=</span><span 
class="s2">&quot;text&quot;</span><span class="p">,</span> <span 
class="n">outputCol</span><span class="o">=</span><span 
class="s2">&quot;words&quot;</span><span class="p">)</span>
+<span class="n">hashingTF</span> <span class="o">=</span> <span 
class="n">HashingTF</span><span class="p">(</span><span 
class="n">inputCol</span><span class="o">=</span><span 
class="n">tokenizer</span><span class="o">.</span><span 
class="n">getOutputCol</span><span class="p">(),</span> <span 
class="n">outputCol</span><span class="o">=</span><span 
class="s2">&quot;features&quot;</span><span class="p">)</span>
 <span class="n">lr</span> <span class="o">=</span> <span 
class="n">LogisticRegression</span><span class="p">(</span><span 
class="n">maxIter</span><span class="o">=</span><span class="mi">10</span><span 
class="p">,</span> <span class="n">regParam</span><span class="o">=</span><span 
class="mf">0.001</span><span class="p">)</span>
 <span class="n">pipeline</span> <span class="o">=</span> <span 
class="n">Pipeline</span><span class="p">(</span><span 
class="n">stages</span><span class="o">=</span><span class="p">[</span><span 
class="n">tokenizer</span><span class="p">,</span> <span 
class="n">hashingTF</span><span class="p">,</span> <span 
class="n">lr</span><span class="p">])</span>
 
-<span class="c"># Fit the pipeline to training documents.</span>
+<span class="c1"># Fit the pipeline to training documents.</span>
 <span class="n">model</span> <span class="o">=</span> <span 
class="n">pipeline</span><span class="o">.</span><span 
class="n">fit</span><span class="p">(</span><span 
class="n">training</span><span class="p">)</span>
 
-<span class="c"># Prepare test documents, which are unlabeled (id, text) 
tuples.</span>
+<span class="c1"># Prepare test documents, which are unlabeled (id, text) 
tuples.</span>
 <span class="n">test</span> <span class="o">=</span> <span 
class="n">spark</span><span class="o">.</span><span 
class="n">createDataFrame</span><span class="p">([</span>
-    <span class="p">(</span><span class="mi">4</span><span class="p">,</span> 
<span class="s">&quot;spark i j k&quot;</span><span class="p">),</span>
-    <span class="p">(</span><span class="mi">5</span><span class="p">,</span> 
<span class="s">&quot;l m n&quot;</span><span class="p">),</span>
-    <span class="p">(</span><span class="mi">6</span><span class="p">,</span> 
<span class="s">&quot;spark hadoop spark&quot;</span><span class="p">),</span>
-    <span class="p">(</span><span class="mi">7</span><span class="p">,</span> 
<span class="s">&quot;apache hadoop&quot;</span><span class="p">)</span>
-<span class="p">],</span> <span class="p">[</span><span 
class="s">&quot;id&quot;</span><span class="p">,</span> <span 
class="s">&quot;text&quot;</span><span class="p">])</span>
+    <span class="p">(</span><span class="mi">4</span><span class="p">,</span> 
<span class="s2">&quot;spark i j k&quot;</span><span class="p">),</span>
+    <span class="p">(</span><span class="mi">5</span><span class="p">,</span> 
<span class="s2">&quot;l m n&quot;</span><span class="p">),</span>
+    <span class="p">(</span><span class="mi">6</span><span class="p">,</span> 
<span class="s2">&quot;spark hadoop spark&quot;</span><span class="p">),</span>
+    <span class="p">(</span><span class="mi">7</span><span class="p">,</span> 
<span class="s2">&quot;apache hadoop&quot;</span><span class="p">)</span>
+<span class="p">],</span> <span class="p">[</span><span 
class="s2">&quot;id&quot;</span><span class="p">,</span> <span 
class="s2">&quot;text&quot;</span><span class="p">])</span>
 
-<span class="c"># Make predictions on test documents and print columns of 
interest.</span>
+<span class="c1"># Make predictions on test documents and print columns of 
interest.</span>
 <span class="n">prediction</span> <span class="o">=</span> <span 
class="n">model</span><span class="o">.</span><span 
class="n">transform</span><span class="p">(</span><span 
class="n">test</span><span class="p">)</span>
-<span class="n">selected</span> <span class="o">=</span> <span 
class="n">prediction</span><span class="o">.</span><span 
class="n">select</span><span class="p">(</span><span 
class="s">&quot;id&quot;</span><span class="p">,</span> <span 
class="s">&quot;text&quot;</span><span class="p">,</span> <span 
class="s">&quot;probability&quot;</span><span class="p">,</span> <span 
class="s">&quot;prediction&quot;</span><span class="p">)</span>
+<span class="n">selected</span> <span class="o">=</span> <span 
class="n">prediction</span><span class="o">.</span><span 
class="n">select</span><span class="p">(</span><span 
class="s2">&quot;id&quot;</span><span class="p">,</span> <span 
class="s2">&quot;text&quot;</span><span class="p">,</span> <span 
class="s2">&quot;probability&quot;</span><span class="p">,</span> <span 
class="s2">&quot;prediction&quot;</span><span class="p">)</span>
 <span class="k">for</span> <span class="n">row</span> <span 
class="ow">in</span> <span class="n">selected</span><span 
class="o">.</span><span class="n">collect</span><span class="p">():</span>
     <span class="n">rid</span><span class="p">,</span> <span 
class="n">text</span><span class="p">,</span> <span class="n">prob</span><span 
class="p">,</span> <span class="n">prediction</span> <span class="o">=</span> 
<span class="n">row</span>
-    <span class="k">print</span><span class="p">(</span><span 
class="s">&quot;(</span><span class="si">%d</span><span class="s">, 
</span><span class="si">%s</span><span class="s">) --&gt; prob=</span><span 
class="si">%s</span><span class="s">, prediction=</span><span 
class="si">%f</span><span class="s">&quot;</span> <span class="o">%</span> 
<span class="p">(</span><span class="n">rid</span><span class="p">,</span> 
<span class="n">text</span><span class="p">,</span> <span 
class="nb">str</span><span class="p">(</span><span class="n">prob</span><span 
class="p">),</span> <span class="n">prediction</span><span class="p">))</span>
+    <span class="k">print</span><span class="p">(</span><span 
class="s2">&quot;(</span><span class="si">%d</span><span class="s2">, 
</span><span class="si">%s</span><span class="s2">) --&gt; prob=</span><span 
class="si">%s</span><span class="s2">, prediction=</span><span 
class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> 
<span class="p">(</span><span class="n">rid</span><span class="p">,</span> 
<span class="n">text</span><span class="p">,</span> <span 
class="nb">str</span><span class="p">(</span><span class="n">prob</span><span 
class="p">),</span> <span class="n">prediction</span><span class="p">))</span>
 </pre></div><div><small>Find full example code at 
"examples/src/main/python/ml/pipeline_example.py" in the Spark 
repo.</small></div>
 </div>
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to