http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/pyspark.mllib.html ---------------------------------------------------------------------- diff --git a/site/docs/2.1.2/api/python/pyspark.mllib.html b/site/docs/2.1.2/api/python/pyspark.mllib.html index 354fa24..53418fa 100644 --- a/site/docs/2.1.2/api/python/pyspark.mllib.html +++ b/site/docs/2.1.2/api/python/pyspark.mllib.html @@ -936,7 +936,7 @@ of points (if < 1.0) of a divisible cluster. <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="n">array</span><span class="p">([</span><span class="mf">0.0</span><span class="p">,</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">9.0</span><span class="p">,</span><span class="mf">8.0</span><span class="p">,</span> <span class="mf">8.0</span><span class="p">,</span><span class="mf">9.0</span><span class="p">])</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">model</span> <span class="o">=</span> <span class="n">KMeans</span><span class="o">.</span><span class="n">train</span><span class="p">(</span> <span class="gp">... </span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">data</span><span class="p">),</span> <span class="mi">2</span><span class="p">,</span> <span class="n">maxIterations</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">initializationMode</span><span class="o">=</span><span class="s2">"random"</span><span class="p">,</span> -<span class="gp">... </span> <span class="n">seed</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">initializationSteps</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mi">1</span><span class="n">e</span><span class="o">-</span><span class="mi">4</span><span class="p">)</span> +<span class="gp">... </span> <span class="n">seed</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">initializationSteps</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-4</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">]))</span> <span class="o">==</span> <span class="n">model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">]))</span> <span class="go">True</span> <span class="gp">>>> </span><span class="n">model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mf">8.0</span><span class="p">,</span> <span class="mf">9.0</span><span class="p">]))</span> <span class="o">==</span> <span class="n">model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mf">9.0</span><span class="p">,</span> <span class="mf">8.0</span><span class="p">]))</span> @@ -953,7 +953,7 @@ of points (if < 1.0) of a divisible cluster. <span class="gp">... </span> <span class="n">SparseVector</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="p">{</span><span class="mi">2</span><span class="p">:</span> <span class="mf">1.1</span><span class="p">})</span> <span class="gp">... </span><span class="p">]</span> <span class="gp">>>> </span><span class="n">model</span> <span class="o">=</span> <span class="n">KMeans</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">sparse_data</span><span class="p">),</span> <span class="mi">2</span><span class="p">,</span> <span class="n">initializationMode</span><span class="o">=</span><span class="s2">"k-means||"</span><span class="p">,</span> -<span class="gp">... </span> <span class="n">seed</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">initializationSteps</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mi">1</span><span class="n">e</span><span class="o">-</span><span class="mi">4</span><span class="p">)</span> +<span class="gp">... </span> <span class="n">seed</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">initializationSteps</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-4</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mf">0.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">0.</span><span class="p">]))</span> <span class="o">==</span> <span class="n">model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mf">1.1</span><span class="p">,</span> <span class="mf">0.</span><span class="p">]))</span> <span class="go">True</span> <span class="gp">>>> </span><span class="n">model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mf">0.</span><span class="p">,</span> <span class="mf">0.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">]))</span> <span class="o">==</span> <span class="n">model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mf">1.1</span><span class="p">]))</span> @@ -1579,25 +1579,18 @@ a gaussian population with constant weights.</p> <li>n_t+1 = n_t * a + m_t</li> </ul> <p>where</p> -<ul> -<li><p class="first">c_t: Centroid at the n_th iteration.</p> -</li> +<ul class="simple"> +<li>c_t: Centroid at the n_th iteration.</li> <li><dl class="first docutils"> <dt>n_t: Number of samples (or) weights associated with the centroid</dt> -<dd><p class="first last">at the n_th iteration.</p> -</dd> +<dd>at the n_th iteration.</dd> </dl> </li> -<li><p class="first">x_t: Centroid of the new data closest to c_t.</p> -</li> -<li><p class="first">m_t: Number of samples (or) weights of the new data closest to c_t</p> -</li> -<li><p class="first">c_t+1: New centroid.</p> -</li> -<li><p class="first">n_t+1: New number of weights.</p> -</li> -<li><p class="first">a: Decay Factor, which gives the forgetfulness.</p> -</li> +<li>x_t: Centroid of the new data closest to c_t.</li> +<li>m_t: Number of samples (or) weights of the new data closest to c_t</li> +<li>c_t+1: New centroid.</li> +<li>n_t+1: New number of weights.</li> +<li>a: Decay Factor, which gives the forgetfulness.</li> </ul> <div class="admonition note"> <p class="first admonition-title">Note</p> @@ -1622,7 +1615,7 @@ forgotten.</p> <span class="gp">>>> </span><span class="n">stkm</span> <span class="o">=</span> <span class="n">StreamingKMeansModel</span><span class="p">(</span><span class="n">initCenters</span><span class="p">,</span> <span class="n">initWeights</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([[</span><span class="o">-</span><span class="mf">0.1</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.1</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">],</span> <span class="gp">... </span> <span class="p">[</span><span class="mf">0.9</span><span class="p">,</span> <span class="mf">0.9</span><span class="p">],</span> <span class="p">[</span><span class="mf">1.1</span><span class="p">,</span> <span class="mf">1.1</span><span class="p">]])</span> -<span class="gp">>>> </span><span class="n">stkm</span> <span class="o">=</span> <span class="n">stkm</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="s2">u"batches"</span><span class="p">)</span> +<span class="gp">>>> </span><span class="n">stkm</span> <span class="o">=</span> <span class="n">stkm</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="sa">u</span><span class="s2">"batches"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">stkm</span><span class="o">.</span><span class="n">centers</span> <span class="go">array([[ 0., 0.],</span> <span class="go"> [ 1., 1.]])</span> @@ -1634,7 +1627,7 @@ forgotten.</p> <span class="go">[3.0, 3.0]</span> <span class="gp">>>> </span><span class="n">decayFactor</span> <span class="o">=</span> <span class="mf">0.0</span> <span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="n">DenseVector</span><span class="p">([</span><span class="mf">1.5</span><span class="p">,</span> <span class="mf">1.5</span><span class="p">]),</span> <span class="n">DenseVector</span><span class="p">([</span><span class="mf">0.2</span><span class="p">,</span> <span class="mf">0.2</span><span class="p">])])</span> -<span class="gp">>>> </span><span class="n">stkm</span> <span class="o">=</span> <span class="n">stkm</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="s2">u"batches"</span><span class="p">)</span> +<span class="gp">>>> </span><span class="n">stkm</span> <span class="o">=</span> <span class="n">stkm</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="sa">u</span><span class="s2">"batches"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">stkm</span><span class="o">.</span><span class="n">centers</span> <span class="go">array([[ 0.2, 0.2],</span> <span class="go"> [ 1.5, 1.5]])</span> @@ -2643,7 +2636,7 @@ Compositionality.</p> <p>Querying for synonyms of a word will not return that word:</p> <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">syms</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">findSynonyms</span><span class="p">(</span><span class="s2">"a"</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="gp">>>> </span><span class="p">[</span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">syms</span><span class="p">]</span> -<span class="go">[u'b', u'c']</span> +<span class="go">['b', 'c']</span> </pre></div> </div> <p>But querying for synonyms of a vector may return the word whose @@ -2651,7 +2644,7 @@ representation is that vector:</p> <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">vec</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="s2">"a"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">syms</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">findSynonyms</span><span class="p">(</span><span class="n">vec</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="gp">>>> </span><span class="p">[</span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">syms</span><span class="p">]</span> -<span class="go">[u'a', u'b']</span> +<span class="go">['a', 'b']</span> </pre></div> </div> <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">os</span><span class="o">,</span> <span class="nn">tempfile</span> @@ -2662,7 +2655,7 @@ representation is that vector:</p> <span class="go">True</span> <span class="gp">>>> </span><span class="n">syms</span> <span class="o">=</span> <span class="n">sameModel</span><span class="o">.</span><span class="n">findSynonyms</span><span class="p">(</span><span class="s2">"a"</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="gp">>>> </span><span class="p">[</span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">syms</span><span class="p">]</span> -<span class="go">[u'b', u'c']</span> +<span class="go">['b', 'c']</span> <span class="gp">>>> </span><span class="kn">from</span> <span class="nn">shutil</span> <span class="k">import</span> <span class="n">rmtree</span> <span class="gp">>>> </span><span class="k">try</span><span class="p">:</span> <span class="gp">... </span> <span class="n">rmtree</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> @@ -3053,7 +3046,7 @@ using the Parallel FP-Growth algorithm.</p> <span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">model</span> <span class="o">=</span> <span class="n">FPGrowth</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="n">rdd</span><span class="p">,</span> <span class="mf">0.6</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">freqItemsets</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span> -<span class="go">[FreqItemset(items=[u'a'], freq=4), FreqItemset(items=[u'c'], freq=3), ...</span> +<span class="go">[FreqItemset(items=['a'], freq=4), FreqItemset(items=['c'], freq=3), ...</span> <span class="gp">>>> </span><span class="n">model_path</span> <span class="o">=</span> <span class="n">temp_path</span> <span class="o">+</span> <span class="s2">"/fpm"</span> <span class="gp">>>> </span><span class="n">model</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">model_path</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">sameModel</span> <span class="o">=</span> <span class="n">FPGrowthModel</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">model_path</span><span class="p">)</span> @@ -3151,7 +3144,7 @@ another iteration of distributed prefix growth is run. <span class="gp">>>> </span><span class="n">rdd</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">model</span> <span class="o">=</span> <span class="n">PrefixSpan</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="n">rdd</span><span class="p">)</span> <span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">freqSequences</span><span class="p">()</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span> -<span class="go">[FreqSequence(sequence=[[u'a']], freq=3), FreqSequence(sequence=[[u'a'], [u'a']], freq=1), ...</span> +<span class="go">[FreqSequence(sequence=[['a']], freq=3), FreqSequence(sequence=[['a'], ['a']], freq=1), ...</span> </pre></div> </div> <div class="versionadded"> @@ -4903,7 +4896,7 @@ distribution with the input mean.</p> <dl class="staticmethod"> <dt id="pyspark.mllib.random.RandomRDDs.exponentialVectorRDD"> -<em class="property">static </em><code class="descname">exponentialVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>*a</em>, <em>**kw</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.exponentialVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.exponentialVectorRDD" title="Permalink to this definition">¶</a></dt> +<em class="property">static </em><code class="descname">exponentialVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>mean</em>, <em>numRows</em>, <em>numCols</em>, <em>numPartitions=None</em>, <em>seed=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.exponentialVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.exponentialVectorRDD" title="Permalink to this definition">¶</a></dt> <dd><p>Generates an RDD comprised of vectors containing i.i.d. samples drawn from the Exponential distribution with the input mean.</p> <table class="docutils field-list" frame="void" rules="none"> @@ -4989,7 +4982,7 @@ distribution with the input shape and scale.</p> <dl class="staticmethod"> <dt id="pyspark.mllib.random.RandomRDDs.gammaVectorRDD"> -<em class="property">static </em><code class="descname">gammaVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>*a</em>, <em>**kw</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.gammaVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.gammaVectorRDD" title="Permalink to this definition">¶</a></dt> +<em class="property">static </em><code class="descname">gammaVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>shape</em>, <em>scale</em>, <em>numRows</em>, <em>numCols</em>, <em>numPartitions=None</em>, <em>seed=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.gammaVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.gammaVectorRDD" title="Permalink to this definition">¶</a></dt> <dd><p>Generates an RDD comprised of vectors containing i.i.d. samples drawn from the Gamma distribution.</p> <table class="docutils field-list" frame="void" rules="none"> @@ -5079,7 +5072,7 @@ distribution with the input mean and standard distribution.</p> <dl class="staticmethod"> <dt id="pyspark.mllib.random.RandomRDDs.logNormalVectorRDD"> -<em class="property">static </em><code class="descname">logNormalVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>*a</em>, <em>**kw</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.logNormalVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.logNormalVectorRDD" title="Permalink to this definition">¶</a></dt> +<em class="property">static </em><code class="descname">logNormalVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>mean</em>, <em>std</em>, <em>numRows</em>, <em>numCols</em>, <em>numPartitions=None</em>, <em>seed=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.logNormalVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.logNormalVectorRDD" title="Permalink to this definition">¶</a></dt> <dd><p>Generates an RDD comprised of vectors containing i.i.d. samples drawn from the log normal distribution.</p> <table class="docutils field-list" frame="void" rules="none"> @@ -5165,7 +5158,7 @@ to some other normal N(mean, sigma^2), use <dl class="staticmethod"> <dt id="pyspark.mllib.random.RandomRDDs.normalVectorRDD"> -<em class="property">static </em><code class="descname">normalVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>*a</em>, <em>**kw</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.normalVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.normalVectorRDD" title="Permalink to this definition">¶</a></dt> +<em class="property">static </em><code class="descname">normalVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>numRows</em>, <em>numCols</em>, <em>numPartitions=None</em>, <em>seed=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.normalVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.normalVectorRDD" title="Permalink to this definition">¶</a></dt> <dd><p>Generates an RDD comprised of vectors containing i.i.d. samples drawn from the standard normal distribution.</p> <table class="docutils field-list" frame="void" rules="none"> @@ -5243,7 +5236,7 @@ distribution with the input mean.</p> <dl class="staticmethod"> <dt id="pyspark.mllib.random.RandomRDDs.poissonVectorRDD"> -<em class="property">static </em><code class="descname">poissonVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>*a</em>, <em>**kw</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.poissonVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.poissonVectorRDD" title="Permalink to this definition">¶</a></dt> +<em class="property">static </em><code class="descname">poissonVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>mean</em>, <em>numRows</em>, <em>numCols</em>, <em>numPartitions=None</em>, <em>seed=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.poissonVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.poissonVectorRDD" title="Permalink to this definition">¶</a></dt> <dd><p>Generates an RDD comprised of vectors containing i.i.d. samples drawn from the Poisson distribution with the input mean.</p> <table class="docutils field-list" frame="void" rules="none"> @@ -5327,7 +5320,7 @@ to U(a, b), use <dl class="staticmethod"> <dt id="pyspark.mllib.random.RandomRDDs.uniformVectorRDD"> -<em class="property">static </em><code class="descname">uniformVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>*a</em>, <em>**kw</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.uniformVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.uniformVectorRDD" title="Permalink to this definition">¶</a></dt> +<em class="property">static </em><code class="descname">uniformVectorRDD</code><span class="sig-paren">(</span><em>sc</em>, <em>numRows</em>, <em>numCols</em>, <em>numPartitions=None</em>, <em>seed=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/pyspark/mllib/random.html#RandomRDDs.uniformVectorRDD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.random.RandomRDDs.uniformVectorRDD" title="Permalink to this definition">¶</a></dt> <dd><p>Generates an RDD comprised of vectors containing i.i.d. samples drawn from the uniform distribution U(0.0, 1.0).</p> <table class="docutils field-list" frame="void" rules="none"> @@ -6598,9 +6591,9 @@ of freedom, p-value, the method used, and the null hypothesis.</p> <span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="n">pearson</span><span class="o">.</span><span class="n">pValue</span><span class="p">,</span> <span class="mi">4</span><span class="p">))</span> <span class="go">0.8187</span> <span class="gp">>>> </span><span class="n">pearson</span><span class="o">.</span><span class="n">method</span> -<span class="go">u'pearson'</span> +<span class="go">'pearson'</span> <span class="gp">>>> </span><span class="n">pearson</span><span class="o">.</span><span class="n">nullHypothesis</span> -<span class="go">u'observed follows the same distribution as expected.'</span> +<span class="go">'observed follows the same distribution as expected.'</span> </pre></div> </div> <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">observed</span> <span class="o">=</span> <span class="n">Vectors</span><span class="o">.</span><span class="n">dense</span><span class="p">([</span><span class="mi">21</span><span class="p">,</span> <span class="mi">38</span><span class="p">,</span> <span class="mi">43</span><span class="p">,</span> <span class="mi">80</span><span class="p">])</span> @@ -6700,7 +6693,7 @@ Supported: <cite>pearson</cite> (default), <cite>spearman</cite></li> <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">x</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="o">-</span><span class="mf">2.0</span><span class="p">],</span> <span class="mi">2</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">y</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="mf">4.0</span><span class="p">,</span> <span class="mf">5.0</span><span class="p">,</span> <span class="mf">3.0</span><span class="p">],</span> <span class="mi">2</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">zeros</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">],</span> <span class="mi">2</span><span class="p">)</span> -<span class="gp">>>> </span><span class="nb">abs</span><span class="p">(</span><span class="n">Statistics</span><span class="o">.</span><span class="n">corr</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> <span class="o">-</span> <span class="mf">0.6546537</span><span class="p">)</span> <span class="o"><</span> <span class="mi">1</span><span class="n">e</span><span class="o">-</span><span class="mi">7</span> +<span class="gp">>>> </span><span class="nb">abs</span><span class="p">(</span><span class="n">Statistics</span><span class="o">.</span><span class="n">corr</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> <span class="o">-</span> <span class="mf">0.6546537</span><span class="p">)</span> <span class="o"><</span> <span class="mf">1e-7</span> <span class="go">True</span> <span class="gp">>>> </span><span class="n">Statistics</span><span class="o">.</span><span class="n">corr</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> <span class="o">==</span> <span class="n">Statistics</span><span class="o">.</span><span class="n">corr</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="s2">"pearson"</span><span class="p">)</span> <span class="go">True</span> @@ -6780,7 +6773,7 @@ the method used, and the null hypothesis.</p> <span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="n">ksmodel</span><span class="o">.</span><span class="n">statistic</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span> <span class="go">0.175</span> <span class="gp">>>> </span><span class="n">ksmodel</span><span class="o">.</span><span class="n">nullHypothesis</span> -<span class="go">u'Sample follows theoretical distribution'</span> +<span class="go">'Sample follows theoretical distribution'</span> </pre></div> </div> <div class="highlight-default"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="mf">2.0</span><span class="p">,</span> <span class="mf">3.0</span><span class="p">,</span> <span class="mf">4.0</span><span class="p">])</span> @@ -8013,7 +8006,7 @@ dimensions.</li> <span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.mllib.util</span> <span class="k">import</span> <span class="n">MLUtils</span> <span class="gp">>>> </span><span class="kn">from</span> <span class="nn">pyspark.mllib.regression</span> <span class="k">import</span> <span class="n">LabeledPoint</span> <span class="gp">>>> </span><span class="n">tempFile</span> <span class="o">=</span> <span class="n">NamedTemporaryFile</span><span class="p">(</span><span class="n">delete</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> -<span class="gp">>>> </span><span class="n">_</span> <span class="o">=</span> <span class="n">tempFile</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">b</span><span class="s2">"+1 1:1.0 3:2.0 5:3.0</span><span class="se">\n</span><span class="s2">-1</span><span class="se">\n</span><span class="s2">-1 2:4.0 4:5.0 6:6.0"</span><span class="p">)</span> +<span class="gp">>>> </span><span class="n">_</span> <span class="o">=</span> <span class="n">tempFile</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="sa">b</span><span class="s2">"+1 1:1.0 3:2.0 5:3.0</span><span class="se">\n</span><span class="s2">-1</span><span class="se">\n</span><span class="s2">-1 2:4.0 4:5.0 6:6.0"</span><span class="p">)</span> <span class="gp">>>> </span><span class="n">tempFile</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> <span class="gp">>>> </span><span class="n">examples</span> <span class="o">=</span> <span class="n">MLUtils</span><span class="o">.</span><span class="n">loadLibSVMFile</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">tempFile</span><span class="o">.</span><span class="n">name</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> <span class="gp">>>> </span><span class="n">tempFile</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
--------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org