[2/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.3.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/5660fb9a/site/docs/2.3.1/api/python/pyspark.streaming.html
--
diff --git a/site/docs/2.3.1/api/python/pyspark.streaming.html 
b/site/docs/2.3.1/api/python/pyspark.streaming.html
index 7f1dee5..411799a 100644
--- a/site/docs/2.3.1/api/python/pyspark.streaming.html
+++ b/site/docs/2.3.1/api/python/pyspark.streaming.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.streaming module  PySpark master 
documentation
+pyspark.streaming module  PySpark 2.3.1 documentation
 
 
 
 

[6/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.3.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/5660fb9a/site/docs/2.3.1/api/python/_modules/pyspark/profiler.html
--
diff --git a/site/docs/2.3.1/api/python/_modules/pyspark/profiler.html 
b/site/docs/2.3.1/api/python/_modules/pyspark/profiler.html
index b7ac6ff..84aa845 100644
--- a/site/docs/2.3.1/api/python/_modules/pyspark/profiler.html
+++ b/site/docs/2.3.1/api/python/_modules/pyspark/profiler.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.profiler  PySpark master documentation
+pyspark.profiler  PySpark 2.3.1 documentation
 
 
 
 

[5/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.3.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/5660fb9a/site/docs/2.3.1/api/python/pyspark.ml.html
--
diff --git a/site/docs/2.3.1/api/python/pyspark.ml.html 
b/site/docs/2.3.1/api/python/pyspark.ml.html
index 4ada723..986c949 100644
--- a/site/docs/2.3.1/api/python/pyspark.ml.html
+++ b/site/docs/2.3.1/api/python/pyspark.ml.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.ml package  PySpark master documentation
+pyspark.ml package  PySpark 2.3.1 documentation
 
 
 
 

[3/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.3.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/5660fb9a/site/docs/2.3.1/api/python/pyspark.sql.html
--
diff --git a/site/docs/2.3.1/api/python/pyspark.sql.html 
b/site/docs/2.3.1/api/python/pyspark.sql.html
index 43c51be..6716867 100644
--- a/site/docs/2.3.1/api/python/pyspark.sql.html
+++ b/site/docs/2.3.1/api/python/pyspark.sql.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.sql module  PySpark master documentation
+pyspark.sql module  PySpark 2.3.1 documentation
 
 
 
 

[4/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.3.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/5660fb9a/site/docs/2.3.1/api/python/pyspark.mllib.html
--
diff --git a/site/docs/2.3.1/api/python/pyspark.mllib.html 
b/site/docs/2.3.1/api/python/pyspark.mllib.html
index c449f16..662b562 100644
--- a/site/docs/2.3.1/api/python/pyspark.mllib.html
+++ b/site/docs/2.3.1/api/python/pyspark.mllib.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.mllib package  PySpark master documentation
+pyspark.mllib package  PySpark 2.3.1 documentation
 
 
 
 

[1/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.3.1

2018-07-03 Thread gurwls223
Repository: spark-website
Updated Branches:
  refs/heads/asf-site 26b527127 -> 5660fb9a4


http://git-wip-us.apache.org/repos/asf/spark-website/blob/5660fb9a/site/docs/2.3.1/api/python/searchindex.js
--
diff --git a/site/docs/2.3.1/api/python/searchindex.js 
b/site/docs/2.3.1/api/python/searchindex.js
index 0a5ec65..b5c8344 100644
--- a/site/docs/2.3.1/api/python/searchindex.js
+++ b/site/docs/2.3.1/api/python/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["index","pyspark","pyspark.ml","pyspark.mllib","pyspark.sql","pyspark.streaming"],envversion:52,filenames:["index.rst","pyspark.rst","pyspark.ml.rst","pyspark.mllib.rst","pyspark.sql.rst","pyspark.streaming.rst"],objects:{"":{pyspark:[1,0,0,"-"]},"pyspark.Accumulator":{add:[1,2,1,""],value:[1,3,1,""]},"pyspark.AccumulatorParam":{addInPlace:[1,2,1,""],zero:[1,2,1,""]},"pyspark.BasicProfiler":{profile:[1,2,1,""],stats:[1,2,1,""]},"pyspark.Broadcast":{destroy:[1,2,1,""],dump:[1,2,1,""],load:[1,2,1,""],unpersist:[1,2,1,""],value:[1,3,1,""]},"pyspark.MarshalSerializer":{dumps:[1,2,1,""],loads:[1,2,1,""]},"pyspark.PickleSerializer":{dumps:[1,2,1,""],loads:[1,2,1,""]},"pyspark.Profiler":{dump:[1,2,1,""],profile:[1,2,1,""],show:[1,2,1,""],stats:[1,2,1,""]},"pyspark.RDD":{aggregate:[1,2,1,""],aggregateByKey:[1,2,1,""],cache:[1,2,1,""],cartesian:[1,2,1,""],checkpoint:[1,2,1,""],coalesce:[1,2,1,""],cogroup:[1,2,1,""],collect:[1,2,1,""],collectAsMap:[1,2,1,""],combine
 
ByKey:[1,2,1,""],context:[1,3,1,""],count:[1,2,1,""],countApprox:[1,2,1,""],countApproxDistinct:[1,2,1,""],countByKey:[1,2,1,""],countByValue:[1,2,1,""],distinct:[1,2,1,""],filter:[1,2,1,""],first:[1,2,1,""],flatMap:[1,2,1,""],flatMapValues:[1,2,1,""],fold:[1,2,1,""],foldByKey:[1,2,1,""],foreach:[1,2,1,""],foreachPartition:[1,2,1,""],fullOuterJoin:[1,2,1,""],getCheckpointFile:[1,2,1,""],getNumPartitions:[1,2,1,""],getStorageLevel:[1,2,1,""],glom:[1,2,1,""],groupBy:[1,2,1,""],groupByKey:[1,2,1,""],groupWith:[1,2,1,""],histogram:[1,2,1,""],id:[1,2,1,""],intersection:[1,2,1,""],isCheckpointed:[1,2,1,""],isEmpty:[1,2,1,""],isLocallyCheckpointed:[1,2,1,""],join:[1,2,1,""],keyBy:[1,2,1,""],keys:[1,2,1,""],leftOuterJoin:[1,2,1,""],localCheckpoint:[1,2,1,""],lookup:[1,2,1,""],map:[1,2,1,""],mapPartitions:[1,2,1,""],mapPartitionsWithIndex:[1,2,1,""],mapPartitionsWithSplit:[1,2,1,""],mapValues:[1,2,1,""],max:[1,2,1,""],mean:[1,2,1,""],meanApprox:[1,2,1,""],min:[1,2,1,""],name:[1,2,1,""],parti
 
tionBy:[1,2,1,""],persist:[1,2,1,""],pipe:[1,2,1,""],randomSplit:[1,2,1,""],reduce:[1,2,1,""],reduceByKey:[1,2,1,""],reduceByKeyLocally:[1,2,1,""],repartition:[1,2,1,""],repartitionAndSortWithinPartitions:[1,2,1,""],rightOuterJoin:[1,2,1,""],sample:[1,2,1,""],sampleByKey:[1,2,1,""],sampleStdev:[1,2,1,""],sampleVariance:[1,2,1,""],saveAsHadoopDataset:[1,2,1,""],saveAsHadoopFile:[1,2,1,""],saveAsNewAPIHadoopDataset:[1,2,1,""],saveAsNewAPIHadoopFile:[1,2,1,""],saveAsPickleFile:[1,2,1,""],saveAsSequenceFile:[1,2,1,""],saveAsTextFile:[1,2,1,""],setName:[1,2,1,""],sortBy:[1,2,1,""],sortByKey:[1,2,1,""],stats:[1,2,1,""],stdev:[1,2,1,""],subtract:[1,2,1,""],subtractByKey:[1,2,1,""],sum:[1,2,1,""],sumApprox:[1,2,1,""],take:[1,2,1,""],takeOrdered:[1,2,1,""],takeSample:[1,2,1,""],toDebugString:[1,2,1,""],toLocalIterator:[1,2,1,""],top:[1,2,1,""],treeAggregate:[1,2,1,""],treeReduce:[1,2,1,""],union:[1,2,1,""],unpersist:[1,2,1,""],values:[1,2,1,""],variance:[1,2,1,""],zip:[1,2,1,""],zipWithIndex
 
:[1,2,1,""],zipWithUniqueId:[1,2,1,""]},"pyspark.SparkConf":{contains:[1,2,1,""],get:[1,2,1,""],getAll:[1,2,1,""],set:[1,2,1,""],setAll:[1,2,1,""],setAppName:[1,2,1,""],setExecutorEnv:[1,2,1,""],setIfMissing:[1,2,1,""],setMaster:[1,2,1,""],setSparkHome:[1,2,1,""],toDebugString:[1,2,1,""]},"pyspark.SparkContext":{PACKAGE_EXTENSIONS:[1,3,1,""],accumulator:[1,2,1,""],addFile:[1,2,1,""],addPyFile:[1,2,1,""],applicationId:[1,3,1,""],binaryFiles:[1,2,1,""],binaryRecords:[1,2,1,""],broadcast:[1,2,1,""],cancelAllJobs:[1,2,1,""],cancelJobGroup:[1,2,1,""],defaultMinPartitions:[1,3,1,""],defaultParallelism:[1,3,1,""],dump_profiles:[1,2,1,""],emptyRDD:[1,2,1,""],getConf:[1,2,1,""],getLocalProperty:[1,2,1,""],getOrCreate:[1,4,1,""],hadoopFile:[1,2,1,""],hadoopRDD:[1,2,1,""],newAPIHadoopFile:[1,2,1,""],newAPIHadoopRDD:[1,2,1,""],parallelize:[1,2,1,""],pickleFile:[1,2,1,""],range:[1,2,1,""],runJob:[1,2,1,""],sequenceFile:[1,2,1,""],setCheckpointDir:[1,2,1,""],setJobDescription:[1,2,1,""],setJobGro
 

[7/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.2.1

2018-07-03 Thread gurwls223
Fix signature description broken in PySpark API documentation in 2.2.1


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/26b52712
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/26b52712
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/26b52712

Branch: refs/heads/asf-site
Commit: 26b5271279a72e7d78948abf96f69ea3a99db209
Parents: 8857572
Author: hyukjinkwon 
Authored: Tue Jul 3 01:53:07 2018 +0800
Committer: hyukjinkwon 
Committed: Wed Jul 4 12:40:02 2018 +0800

--
 site/docs/2.2.1/api/python/_modules/index.html  |   8 +-
 .../python/_modules/pyspark/accumulators.html   |   8 +-
 .../api/python/_modules/pyspark/broadcast.html  |   8 +-
 .../2.2.1/api/python/_modules/pyspark/conf.html |   8 +-
 .../api/python/_modules/pyspark/context.html|   8 +-
 .../api/python/_modules/pyspark/files.html  |   8 +-
 .../api/python/_modules/pyspark/ml/base.html|   8 +-
 .../_modules/pyspark/ml/classification.html |   8 +-
 .../python/_modules/pyspark/ml/clustering.html  |   8 +-
 .../python/_modules/pyspark/ml/evaluation.html  |   8 +-
 .../api/python/_modules/pyspark/ml/feature.html |   8 +-
 .../api/python/_modules/pyspark/ml/fpm.html |   8 +-
 .../api/python/_modules/pyspark/ml/linalg.html  |   8 +-
 .../api/python/_modules/pyspark/ml/param.html   |   8 +-
 .../_modules/pyspark/ml/param/shared.html   |   8 +-
 .../python/_modules/pyspark/ml/pipeline.html|   8 +-
 .../_modules/pyspark/ml/recommendation.html |   8 +-
 .../python/_modules/pyspark/ml/regression.html  |   8 +-
 .../api/python/_modules/pyspark/ml/stat.html|   8 +-
 .../api/python/_modules/pyspark/ml/tuning.html  |   8 +-
 .../api/python/_modules/pyspark/ml/util.html|   8 +-
 .../api/python/_modules/pyspark/ml/wrapper.html |   8 +-
 .../_modules/pyspark/mllib/classification.html  |   8 +-
 .../_modules/pyspark/mllib/clustering.html  |   8 +-
 .../python/_modules/pyspark/mllib/common.html   |   8 +-
 .../_modules/pyspark/mllib/evaluation.html  |   8 +-
 .../python/_modules/pyspark/mllib/feature.html  |   8 +-
 .../api/python/_modules/pyspark/mllib/fpm.html  |   8 +-
 .../python/_modules/pyspark/mllib/linalg.html   |   8 +-
 .../pyspark/mllib/linalg/distributed.html   |   8 +-
 .../python/_modules/pyspark/mllib/random.html   |   8 +-
 .../_modules/pyspark/mllib/recommendation.html  |   8 +-
 .../_modules/pyspark/mllib/regression.html  |   8 +-
 .../pyspark/mllib/stat/KernelDensity.html   |   8 +-
 .../pyspark/mllib/stat/distribution.html|   8 +-
 .../_modules/pyspark/mllib/stat/test.html   |   8 +-
 .../api/python/_modules/pyspark/mllib/tree.html |   8 +-
 .../api/python/_modules/pyspark/mllib/util.html |   8 +-
 .../api/python/_modules/pyspark/profiler.html   |   8 +-
 .../2.2.1/api/python/_modules/pyspark/rdd.html  |   8 +-
 .../python/_modules/pyspark/serializers.html|   8 +-
 .../api/python/_modules/pyspark/sql/column.html |   8 +-
 .../python/_modules/pyspark/sql/context.html|   8 +-
 .../python/_modules/pyspark/sql/dataframe.html  |   8 +-
 .../python/_modules/pyspark/sql/functions.html  |   8 +-
 .../api/python/_modules/pyspark/sql/group.html  |   8 +-
 .../python/_modules/pyspark/sql/readwriter.html |   8 +-
 .../python/_modules/pyspark/sql/session.html|   8 +-
 .../python/_modules/pyspark/sql/streaming.html  |   8 +-
 .../api/python/_modules/pyspark/sql/types.html  |   8 +-
 .../api/python/_modules/pyspark/sql/window.html |   8 +-
 .../api/python/_modules/pyspark/status.html |   8 +-
 .../python/_modules/pyspark/storagelevel.html   |   8 +-
 .../_modules/pyspark/streaming/context.html |   8 +-
 .../_modules/pyspark/streaming/dstream.html |   8 +-
 .../_modules/pyspark/streaming/flume.html   |   8 +-
 .../_modules/pyspark/streaming/kafka.html   |   8 +-
 .../_modules/pyspark/streaming/kinesis.html |   8 +-
 .../_modules/pyspark/streaming/listener.html|   8 +-
 .../python/_modules/pyspark/taskcontext.html|   8 +-
 site/docs/2.2.1/api/python/index.html   |   8 +-
 site/docs/2.2.1/api/python/pyspark.html |  30 +-
 site/docs/2.2.1/api/python/pyspark.ml.html  | 164 +--
 site/docs/2.2.1/api/python/pyspark.mllib.html   |  36 +--
 site/docs/2.2.1/api/python/pyspark.sql.html | 272 +--
 .../2.2.1/api/python/pyspark.streaming.html |  11 +-
 site/docs/2.2.1/api/python/search.html  |   8 +-
 site/docs/2.2.1/api/python/searchindex.js   |   2 +-
 68 files changed, 506 insertions(+), 505 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/26b52712/site/docs/2.2.1/api/python/_modules/index.html
--
diff --git 

[5/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.2.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/26b52712/site/docs/2.2.1/api/python/pyspark.ml.html
--
diff --git a/site/docs/2.2.1/api/python/pyspark.ml.html 
b/site/docs/2.2.1/api/python/pyspark.ml.html
index 1398703..a5757cd 100644
--- a/site/docs/2.2.1/api/python/pyspark.ml.html
+++ b/site/docs/2.2.1/api/python/pyspark.ml.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.ml package  PySpark  documentation
+pyspark.ml package  PySpark 2.2.1 documentation
 
 
 
 

[2/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.2.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/26b52712/site/docs/2.2.1/api/python/pyspark.streaming.html
--
diff --git a/site/docs/2.2.1/api/python/pyspark.streaming.html 
b/site/docs/2.2.1/api/python/pyspark.streaming.html
index 6254899..f5543b5 100644
--- a/site/docs/2.2.1/api/python/pyspark.streaming.html
+++ b/site/docs/2.2.1/api/python/pyspark.streaming.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.streaming module  PySpark  documentation
+pyspark.streaming module  PySpark 2.2.1 documentation
 
 
 
 

[6/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.2.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/26b52712/site/docs/2.2.1/api/python/_modules/pyspark/rdd.html
--
diff --git a/site/docs/2.2.1/api/python/_modules/pyspark/rdd.html 
b/site/docs/2.2.1/api/python/_modules/pyspark/rdd.html
index ee22d01..17adf92 100644
--- a/site/docs/2.2.1/api/python/_modules/pyspark/rdd.html
+++ b/site/docs/2.2.1/api/python/_modules/pyspark/rdd.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.rdd  PySpark  documentation
+pyspark.rdd  PySpark 2.2.1 documentation
 
 
 
 

[4/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.2.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/26b52712/site/docs/2.2.1/api/python/pyspark.mllib.html
--
diff --git a/site/docs/2.2.1/api/python/pyspark.mllib.html 
b/site/docs/2.2.1/api/python/pyspark.mllib.html
index cd27d38..baf0804 100644
--- a/site/docs/2.2.1/api/python/pyspark.mllib.html
+++ b/site/docs/2.2.1/api/python/pyspark.mllib.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.mllib package  PySpark  documentation
+pyspark.mllib package  PySpark 2.2.1 documentation
 
 
 
 

[3/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.2.1

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/26b52712/site/docs/2.2.1/api/python/pyspark.sql.html
--
diff --git a/site/docs/2.2.1/api/python/pyspark.sql.html 
b/site/docs/2.2.1/api/python/pyspark.sql.html
index 8b349cc..2174c25 100644
--- a/site/docs/2.2.1/api/python/pyspark.sql.html
+++ b/site/docs/2.2.1/api/python/pyspark.sql.html
@@ -5,14 +5,14 @@
 http://www.w3.org/1999/xhtml;>
   
 
-pyspark.sql module  PySpark  documentation
+pyspark.sql module  PySpark 2.2.1 documentation
 
 
 
 

[1/7] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.2.1

2018-07-03 Thread gurwls223
Repository: spark-website
Updated Branches:
  refs/heads/asf-site 8857572df -> 26b527127


http://git-wip-us.apache.org/repos/asf/spark-website/blob/26b52712/site/docs/2.2.1/api/python/searchindex.js
--
diff --git a/site/docs/2.2.1/api/python/searchindex.js 
b/site/docs/2.2.1/api/python/searchindex.js
index b40aeb8..345db45 100644
--- a/site/docs/2.2.1/api/python/searchindex.js
+++ b/site/docs/2.2.1/api/python/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["index","pyspark","pyspark.ml","pyspark.mllib","pyspark.sql","pyspark.streaming"],envversion:52,filenames:["index.rst","pyspark.rst","pyspark.ml.rst","pyspark.mllib.rst","pyspark.sql.rst","pyspark.streaming.rst"],objects:{"":{pyspark:[1,0,0,"-"]},"pyspark.Accumulator":{add:[1,2,1,""],value:[1,3,1,""]},"pyspark.AccumulatorParam":{addInPlace:[1,2,1,""],zero:[1,2,1,""]},"pyspark.BasicProfiler":{profile:[1,2,1,""],stats:[1,2,1,""]},"pyspark.Broadcast":{destroy:[1,2,1,""],dump:[1,2,1,""],load:[1,2,1,""],unpersist:[1,2,1,""],value:[1,3,1,""]},"pyspark.MarshalSerializer":{dumps:[1,2,1,""],loads:[1,2,1,""]},"pyspark.PickleSerializer":{dumps:[1,2,1,""],loads:[1,2,1,""]},"pyspark.Profiler":{dump:[1,2,1,""],profile:[1,2,1,""],show:[1,2,1,""],stats:[1,2,1,""]},"pyspark.RDD":{aggregate:[1,2,1,""],aggregateByKey:[1,2,1,""],cache:[1,2,1,""],cartesian:[1,2,1,""],checkpoint:[1,2,1,""],coalesce:[1,2,1,""],cogroup:[1,2,1,""],collect:[1,2,1,""],collectAsMap:[1,2,1,""],combine
 
ByKey:[1,2,1,""],context:[1,3,1,""],count:[1,2,1,""],countApprox:[1,2,1,""],countApproxDistinct:[1,2,1,""],countByKey:[1,2,1,""],countByValue:[1,2,1,""],distinct:[1,2,1,""],filter:[1,2,1,""],first:[1,2,1,""],flatMap:[1,2,1,""],flatMapValues:[1,2,1,""],fold:[1,2,1,""],foldByKey:[1,2,1,""],foreach:[1,2,1,""],foreachPartition:[1,2,1,""],fullOuterJoin:[1,2,1,""],getCheckpointFile:[1,2,1,""],getNumPartitions:[1,2,1,""],getStorageLevel:[1,2,1,""],glom:[1,2,1,""],groupBy:[1,2,1,""],groupByKey:[1,2,1,""],groupWith:[1,2,1,""],histogram:[1,2,1,""],id:[1,2,1,""],intersection:[1,2,1,""],isCheckpointed:[1,2,1,""],isEmpty:[1,2,1,""],isLocallyCheckpointed:[1,2,1,""],join:[1,2,1,""],keyBy:[1,2,1,""],keys:[1,2,1,""],leftOuterJoin:[1,2,1,""],localCheckpoint:[1,2,1,""],lookup:[1,2,1,""],map:[1,2,1,""],mapPartitions:[1,2,1,""],mapPartitionsWithIndex:[1,2,1,""],mapPartitionsWithSplit:[1,2,1,""],mapValues:[1,2,1,""],max:[1,2,1,""],mean:[1,2,1,""],meanApprox:[1,2,1,""],min:[1,2,1,""],name:[1,2,1,""],parti
 
tionBy:[1,2,1,""],persist:[1,2,1,""],pipe:[1,2,1,""],randomSplit:[1,2,1,""],reduce:[1,2,1,""],reduceByKey:[1,2,1,""],reduceByKeyLocally:[1,2,1,""],repartition:[1,2,1,""],repartitionAndSortWithinPartitions:[1,2,1,""],rightOuterJoin:[1,2,1,""],sample:[1,2,1,""],sampleByKey:[1,2,1,""],sampleStdev:[1,2,1,""],sampleVariance:[1,2,1,""],saveAsHadoopDataset:[1,2,1,""],saveAsHadoopFile:[1,2,1,""],saveAsNewAPIHadoopDataset:[1,2,1,""],saveAsNewAPIHadoopFile:[1,2,1,""],saveAsPickleFile:[1,2,1,""],saveAsSequenceFile:[1,2,1,""],saveAsTextFile:[1,2,1,""],setName:[1,2,1,""],sortBy:[1,2,1,""],sortByKey:[1,2,1,""],stats:[1,2,1,""],stdev:[1,2,1,""],subtract:[1,2,1,""],subtractByKey:[1,2,1,""],sum:[1,2,1,""],sumApprox:[1,2,1,""],take:[1,2,1,""],takeOrdered:[1,2,1,""],takeSample:[1,2,1,""],toDebugString:[1,2,1,""],toLocalIterator:[1,2,1,""],top:[1,2,1,""],treeAggregate:[1,2,1,""],treeReduce:[1,2,1,""],union:[1,2,1,""],unpersist:[1,2,1,""],values:[1,2,1,""],variance:[1,2,1,""],zip:[1,2,1,""],zipWithIndex
 
:[1,2,1,""],zipWithUniqueId:[1,2,1,""]},"pyspark.SparkConf":{contains:[1,2,1,""],get:[1,2,1,""],getAll:[1,2,1,""],set:[1,2,1,""],setAll:[1,2,1,""],setAppName:[1,2,1,""],setExecutorEnv:[1,2,1,""],setIfMissing:[1,2,1,""],setMaster:[1,2,1,""],setSparkHome:[1,2,1,""],toDebugString:[1,2,1,""]},"pyspark.SparkContext":{PACKAGE_EXTENSIONS:[1,3,1,""],accumulator:[1,2,1,""],addFile:[1,2,1,""],addPyFile:[1,2,1,""],applicationId:[1,3,1,""],binaryFiles:[1,2,1,""],binaryRecords:[1,2,1,""],broadcast:[1,2,1,""],cancelAllJobs:[1,2,1,""],cancelJobGroup:[1,2,1,""],defaultMinPartitions:[1,3,1,""],defaultParallelism:[1,3,1,""],dump_profiles:[1,2,1,""],emptyRDD:[1,2,1,""],getConf:[1,2,1,""],getLocalProperty:[1,2,1,""],getOrCreate:[1,4,1,""],hadoopFile:[1,2,1,""],hadoopRDD:[1,2,1,""],newAPIHadoopFile:[1,2,1,""],newAPIHadoopRDD:[1,2,1,""],parallelize:[1,2,1,""],pickleFile:[1,2,1,""],range:[1,2,1,""],runJob:[1,2,1,""],sequenceFile:[1,2,1,""],setCheckpointDir:[1,2,1,""],setJobGroup:[1,2,1,""],setLocalPropert
 

spark git commit: [SPARK-24732][SQL] Type coercion between MapTypes.

2018-07-03 Thread gurwls223
Repository: spark
Updated Branches:
  refs/heads/master 5bf95f2a3 -> 7c08eb6d6


[SPARK-24732][SQL] Type coercion between MapTypes.

## What changes were proposed in this pull request?

Currently we don't allow type coercion between maps.
We can support type coercion between MapTypes where both the key types and the 
value types are compatible.

## How was this patch tested?

Added tests.

Author: Takuya UESHIN 

Closes #21703 from ueshin/issues/SPARK-24732/maptypecoercion.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7c08eb6d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7c08eb6d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7c08eb6d

Branch: refs/heads/master
Commit: 7c08eb6d61d55ce45229f3302e6d463e7669183d
Parents: 5bf95f2
Author: Takuya UESHIN 
Authored: Wed Jul 4 12:21:26 2018 +0800
Committer: hyukjinkwon 
Committed: Wed Jul 4 12:21:26 2018 +0800

--
 .../sql/catalyst/analysis/TypeCoercion.scala| 12 ++
 .../catalyst/analysis/TypeCoercionSuite.scala   | 45 +++-
 2 files changed, 56 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7c08eb6d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 3ebab43..cf90e6e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -179,6 +179,12 @@ object TypeCoercion {
   .orElse((t1, t2) match {
 case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) =>
   findWiderTypeForTwo(et1, et2).map(ArrayType(_, containsNull1 || 
containsNull2))
+case (MapType(kt1, vt1, valueContainsNull1), MapType(kt2, vt2, 
valueContainsNull2)) =>
+  findWiderTypeForTwo(kt1, kt2).flatMap { kt =>
+findWiderTypeForTwo(vt1, vt2).map { vt =>
+  MapType(kt, vt, valueContainsNull1 || valueContainsNull2)
+}
+  }
 case _ => None
   })
   }
@@ -220,6 +226,12 @@ object TypeCoercion {
 case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) =>
   findWiderTypeWithoutStringPromotionForTwo(et1, et2)
 .map(ArrayType(_, containsNull1 || containsNull2))
+case (MapType(kt1, vt1, valueContainsNull1), MapType(kt2, vt2, 
valueContainsNull2)) =>
+  findWiderTypeWithoutStringPromotionForTwo(kt1, kt2).flatMap { kt =>
+findWiderTypeWithoutStringPromotionForTwo(vt1, vt2).map { vt =>
+  MapType(kt, vt, valueContainsNull1 || valueContainsNull2)
+}
+  }
 case _ => None
   })
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/7c08eb6d/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 0acd3b4..4e5ca1b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -54,8 +54,9 @@ class TypeCoercionSuite extends AnalysisTest {
   // | NullType | ByteType | ShortType | IntegerType | LongType | 
DoubleType | FloatType | Dec(10, 2) | BinaryType | BooleanType | StringType | 
DateType | TimestampType | ArrayType  | MapType  | StructType  | NullType | 
CalendarIntervalType | DecimalType(38, 18) | DoubleType  | IntegerType  |
   // | CalendarIntervalType | X| X | X   | X| 
X  | X | X  | X  | X   | X  | X 
   | X | X  | X| X   | X| 
CalendarIntervalType | X   | X   | X|
   // 
+--+--+---+-+--++---+++-++--+---++--+-+--+--+-+-+--+
-  // Note: MapType*, StructType* are castable only when the internal child 
types also match; otherwise, not castable.
+  // Note: StructType* is castable only when the internal child types also 
match; 

svn commit: r27912 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_03_20_01-5bf95f2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-07-03 Thread pwendell
Author: pwendell
Date: Wed Jul  4 03:17:15 2018
New Revision: 27912

Log:
Apache Spark 2.4.0-SNAPSHOT-2018_07_03_20_01-5bf95f2 docs


[This commit notification would consist of 1467 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [BUILD] Close stale PRs

2018-07-03 Thread gurwls223
Repository: spark
Updated Branches:
  refs/heads/master b42fda8ab -> 5bf95f2a3


[BUILD] Close stale PRs

Closes #20932
Closes #17843
Closes #13477
Closes #14291
Closes #20919
Closes #17907
Closes #18766
Closes #20809
Closes #8849
Closes #21076
Closes #21507
Closes #21336
Closes #21681
Closes #21691

Author: Sean Owen 

Closes #21708 from srowen/CloseStalePRs.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5bf95f2a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5bf95f2a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5bf95f2a

Branch: refs/heads/master
Commit: 5bf95f2a37e624eb6fb0ef6fbd2a40a129d5a470
Parents: b42fda8
Author: Sean Owen 
Authored: Wed Jul 4 09:53:04 2018 +0800
Committer: hyukjinkwon 
Committed: Wed Jul 4 09:53:04 2018 +0800

--

--



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-23698] Remove raw_input() from Python 2

2018-07-03 Thread gurwls223
Repository: spark
Updated Branches:
  refs/heads/master 776f299fc -> b42fda8ab


[SPARK-23698] Remove raw_input() from Python 2

Signed-off-by: cclauss 

## What changes were proposed in this pull request?

Humans will be able to enter text in Python 3 prompts which they can not do 
today.
The Python builtin __raw_input()__ was removed in Python 3 in favor of 
__input()__.  This PR does the same thing in Python 2.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)
flake8 testing

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: cclauss 

Closes #21702 from cclauss/python-fix-raw_input.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b42fda8a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b42fda8a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b42fda8a

Branch: refs/heads/master
Commit: b42fda8ab3b5f82b33b96fce3f584c50f2ed5a3a
Parents: 776f299
Author: cclauss 
Authored: Wed Jul 4 09:40:58 2018 +0800
Committer: hyukjinkwon 
Committed: Wed Jul 4 09:40:58 2018 +0800

--
 dev/create-release/releaseutils.py |  5 -
 dev/merge_spark_pr.py  | 21 -
 2 files changed, 16 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b42fda8a/dev/create-release/releaseutils.py
--
diff --git a/dev/create-release/releaseutils.py 
b/dev/create-release/releaseutils.py
index 32f6cbb..ab812e1 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -49,13 +49,16 @@ except ImportError:
 print("Install using 'sudo pip install unidecode'")
 sys.exit(-1)
 
+if sys.version < '3':
+input = raw_input
+
 # Contributors list file name
 contributors_file_name = "contributors.txt"
 
 
 # Prompt the user to answer yes or no until they do so
 def yesOrNoPrompt(msg):
-response = raw_input("%s [y/n]: " % msg)
+response = input("%s [y/n]: " % msg)
 while response != "y" and response != "n":
 return yesOrNoPrompt(msg)
 return response == "y"

http://git-wip-us.apache.org/repos/asf/spark/blob/b42fda8a/dev/merge_spark_pr.py
--
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 7f46a1c..79c7c02 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -39,6 +39,9 @@ try:
 except ImportError:
 JIRA_IMPORTED = False
 
+if sys.version < '3':
+input = raw_input
+
 # Location of your Spark git development area
 SPARK_HOME = os.environ.get("SPARK_HOME", os.getcwd())
 # Remote name which points to the Gihub site
@@ -95,7 +98,7 @@ def run_cmd(cmd):
 
 
 def continue_maybe(prompt):
-result = raw_input("\n%s (y/n): " % prompt)
+result = input("\n%s (y/n): " % prompt)
 if result.lower() != "y":
 fail("Okay, exiting")
 
@@ -134,7 +137,7 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
  '--pretty=format:%an <%ae>']).split("\n")
 distinct_authors = sorted(set(commit_authors),
   key=lambda x: commit_authors.count(x), 
reverse=True)
-primary_author = raw_input(
+primary_author = input(
 "Enter primary author in the format of \"name \" [%s]: " %
 distinct_authors[0])
 if primary_author == "":
@@ -184,7 +187,7 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
 
 
 def cherry_pick(pr_num, merge_hash, default_branch):
-pick_ref = raw_input("Enter a branch name [%s]: " % default_branch)
+pick_ref = input("Enter a branch name [%s]: " % default_branch)
 if pick_ref == "":
 pick_ref = default_branch
 
@@ -231,7 +234,7 @@ def resolve_jira_issue(merge_branches, comment, 
default_jira_id=""):
 asf_jira = jira.client.JIRA({'server': JIRA_API_BASE},
 basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
 
-jira_id = raw_input("Enter a JIRA id [%s]: " % default_jira_id)
+jira_id = input("Enter a JIRA id [%s]: " % default_jira_id)
 if jira_id == "":
 jira_id = default_jira_id
 
@@ -276,7 +279,7 @@ def resolve_jira_issue(merge_branches, comment, 
default_jira_id=""):
 default_fix_versions = filter(lambda x: x != v, 
default_fix_versions)
 default_fix_versions = ",".join(default_fix_versions)
 
-fix_versions = raw_input("Enter comma-separated fix version(s) [%s]: " % 
default_fix_versions)
+fix_versions = input("Enter comma-separated fix version(s) [%s]: " % 
default_fix_versions)
 if fix_versions == "":
 

spark git commit: [SPARK-24709][SQL] schema_of_json() - schema inference from an example

2018-07-03 Thread gurwls223
Repository: spark
Updated Branches:
  refs/heads/master 5585c5765 -> 776f299fc


[SPARK-24709][SQL] schema_of_json() - schema inference from an example

## What changes were proposed in this pull request?

In the PR, I propose to add new function - *schema_of_json()* which infers 
schema of JSON string literal. The result of the function is a string 
containing a schema in DDL format.

One of the use cases is using of *schema_of_json()* in the combination with 
*from_json()*. Currently, _from_json()_ requires a schema as a mandatory 
argument. The *schema_of_json()* function will allow to point out an JSON 
string as an example which has the same schema as the first argument of 
_from_json()_. For instance:

```sql
select from_json(json_column, schema_of_json('{"c1": [0], "c2": [{"c3":0}]}'))
from json_table;
```

## How was this patch tested?

Added new test to `JsonFunctionsSuite`, `JsonExpressionsSuite` and SQL tests to 
`json-functions.sql`

Author: Maxim Gekk 

Closes #21686 from MaxGekk/infer_schema_json.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/776f299f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/776f299f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/776f299f

Branch: refs/heads/master
Commit: 776f299fc8146b400e97185b1577b0fc8f06e14b
Parents: 5585c57
Author: Maxim Gekk 
Authored: Wed Jul 4 09:38:18 2018 +0800
Committer: hyukjinkwon 
Committed: Wed Jul 4 09:38:18 2018 +0800

--
 python/pyspark/sql/functions.py |  27 ++
 .../catalyst/analysis/FunctionRegistry.scala|   1 +
 .../catalyst/expressions/jsonExpressions.scala  |  52 ++-
 .../sql/catalyst/json/JsonInferSchema.scala | 348 ++
 .../expressions/JsonExpressionsSuite.scala  |   7 +
 .../datasources/json/JsonDataSource.scala   |   2 +-
 .../datasources/json/JsonInferSchema.scala  | 349 ---
 .../scala/org/apache/spark/sql/functions.scala  |  42 +++
 .../sql-tests/inputs/json-functions.sql |   4 +
 .../sql-tests/results/json-functions.sql.out|  20 +-
 .../apache/spark/sql/JsonFunctionsSuite.scala   |  17 +-
 .../execution/datasources/json/JsonSuite.scala  |   4 +-
 12 files changed, 509 insertions(+), 364 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/776f299f/python/pyspark/sql/functions.py
--
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 9652d3e..4d37197 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2189,11 +2189,16 @@ def from_json(col, schema, options={}):
 >>> df = spark.createDataFrame(data, ("key", "value"))
 >>> df.select(from_json(df.value, schema).alias("json")).collect()
 [Row(json=[Row(a=1)])]
+>>> schema = schema_of_json(lit('''{"a": 0}'''))
+>>> df.select(from_json(df.value, schema).alias("json")).collect()
+[Row(json=Row(a=1))]
 """
 
 sc = SparkContext._active_spark_context
 if isinstance(schema, DataType):
 schema = schema.json()
+elif isinstance(schema, Column):
+schema = _to_java_column(schema)
 jc = sc._jvm.functions.from_json(_to_java_column(col), schema, options)
 return Column(jc)
 
@@ -2235,6 +2240,28 @@ def to_json(col, options={}):
 return Column(jc)
 
 
+@ignore_unicode_prefix
+@since(2.4)
+def schema_of_json(col):
+"""
+Parses a column containing a JSON string and infers its schema in DDL 
format.
+
+:param col: string column in json format
+
+>>> from pyspark.sql.types import *
+>>> data = [(1, '{"a": 1}')]
+>>> df = spark.createDataFrame(data, ("key", "value"))
+>>> df.select(schema_of_json(df.value).alias("json")).collect()
+[Row(json=u'struct')]
+>>> df.select(schema_of_json(lit('{"a": 0}')).alias("json")).collect()
+[Row(json=u'struct')]
+"""
+
+sc = SparkContext._active_spark_context
+jc = sc._jvm.functions.schema_of_json(_to_java_column(col))
+return Column(jc)
+
+
 @since(1.5)
 def size(col):
 """

http://git-wip-us.apache.org/repos/asf/spark/blob/776f299f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index a574d8a..80a0af6 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -505,6 +505,7 @@ object FunctionRegistry {
 // json
 

svn commit: r27905 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_03_12_01-5585c57-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-07-03 Thread pwendell
Author: pwendell
Date: Tue Jul  3 19:16:29 2018
New Revision: 27905

Log:
Apache Spark 2.4.0-SNAPSHOT-2018_07_03_12_01-5585c57 docs


[This commit notification would consist of 1467 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] Git Push Summary

2018-07-03 Thread tgraves
Repository: spark
Updated Tags:  refs/tags/v2.2.2 [created] fc28ba3db

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[5/6] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.3

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/da71a5c1/site/docs/2.1.3/api/python/pyspark.ml.html
--
diff --git a/site/docs/2.1.3/api/python/pyspark.ml.html 
b/site/docs/2.1.3/api/python/pyspark.ml.html
index f37f2df..206a8b1 100644
--- a/site/docs/2.1.3/api/python/pyspark.ml.html
+++ b/site/docs/2.1.3/api/python/pyspark.ml.html
@@ -555,7 +555,7 @@ uses 
 
-class pyspark.ml.Pipeline(*args, 
**kwargs)[source]¶
+class pyspark.ml.Pipeline(stages=None)[source]¶
 A simple pipeline, which acts as an estimator. A Pipeline consists
 of a sequence of stages, each of which is either an
 Estimator or a Transformer. When
@@ -1238,7 +1238,7 @@ uses pyspark.ml.feature module¶
 
 
-class pyspark.ml.feature.Binarizer(*args, **kwargs)[source]¶
+class pyspark.ml.feature.Binarizer(threshold=0.0, inputCol=None, 
outputCol=None)[source]¶
 Binarize a column of continuous features given a threshold.
  df = spark.createDataFrame([(0.5,)], 
[values])
  binarizer = Binarizer(threshold=1.0, inputCol=values, outputCol=features)
@@ -1508,7 +1508,7 @@ uses 
 
-class pyspark.ml.feature.Bucketizer(*args, **kwargs)[source]¶
+class pyspark.ml.feature.Bucketizer(splits=None, inputCol=None, 
outputCol=None, handleInvalid='error')[source]¶
 Maps a column of continuous features to a column of feature buckets.
  values = [(0.1,), (0.4,), (1.2,), (1.5,), (float(nan),), (float(nan),)]
  df = spark.createDataFrame(values, [values])
@@ -1812,7 +1812,7 @@ uses 
 
-class pyspark.ml.feature.ChiSqSelector(*args, **kwargs)[source]¶
+class pyspark.ml.feature.ChiSqSelector(numTopFeatures=50, 
featuresCol='features', outputCol=None, 
labelCol='label', selectorType='numTopFeatures', 
percentile=0.1, fpr=0.05)[source]¶
 
 Note
 Experimental
@@ -2387,7 +2387,7 @@ uses 
 
-class pyspark.ml.feature.CountVectorizer(*args, **kwargs)[source]¶
+class pyspark.ml.feature.CountVectorizer(minTF=1.0, minDF=1.0, 
vocabSize=262144, binary=False, inputCol=None, 
outputCol=None)[source]¶
 Extracts a vocabulary from document collections and generates a CountVectorizerModel.
  df = spark.createDataFrame(
 ...[(0, [a, b, c]), (1, [a, 
b, b, c, a])],
@@ -2940,7 +2940,7 @@ uses 
 
-class pyspark.ml.feature.DCT(*args, 
**kwargs)[source]¶
+class pyspark.ml.feature.DCT(inverse=False, inputCol=None, 
outputCol=None)[source]¶
 A feature transformer that takes the 1D discrete cosine transform
 of a real vector. No zero padding is performed on the input vector.
 It returns a real vector of the same length representing the DCT.
@@ -3218,7 +3218,7 @@ uses 
 
-class pyspark.ml.feature.ElementwiseProduct(*args, **kwargs)[source]¶
+class pyspark.ml.feature.ElementwiseProduct(scalingVec=None, inputCol=None, 
outputCol=None)[source]¶
 Outputs the Hadamard product (i.e., the element-wise product) of each 
input vector
 with a provided “weight” vector. In other words, it scales each column of 
the dataset
 by a scalar multiplier.
@@ -3489,7 +3489,7 @@ uses 
 
-class pyspark.ml.feature.HashingTF(*args, **kwargs)[source]¶
+class pyspark.ml.feature.HashingTF(numFeatures=262144, binary=False, 
inputCol=None, outputCol=None)[source]¶
 Maps a sequence of terms to their term frequencies using the hashing 
trick.
 Currently we use Austin Appleby’s MurmurHash 3 algorithm (MurmurHash3_x86_32)
 to calculate the hash code value for the term object.
@@ -3781,7 +3781,7 @@ uses 
 
-class pyspark.ml.feature.IDF(*args, 
**kwargs)[source]¶
+class pyspark.ml.feature.IDF(minDocFreq=0, inputCol=None, 
outputCol=None)[source]¶
 Compute the Inverse Document Frequency (IDF) given a collection of 
documents.
  from pyspark.ml.linalg import DenseVector
  df = spark.createDataFrame([(DenseVector([1.0, 2.0]),),
@@ -4260,7 +4260,7 @@ uses 
 
-class pyspark.ml.feature.IndexToString(*args, **kwargs)[source]¶
+class pyspark.ml.feature.IndexToString(inputCol=None, outputCol=None, 
labels=None)[source]¶
 A Transformer that maps a column of indices back to a 
new column of
 corresponding string values.
 The index-string mapping is either from the ML attributes of the input column,
@@ -4518,7 +4518,7 @@ uses 
 
-class pyspark.ml.feature.MaxAbsScaler(*args, **kwargs)[source]¶
+class pyspark.ml.feature.MaxAbsScaler(inputCol=None, outputCol=None)[source]¶
 Rescale each feature individually to range [-1, 1] by dividing through 
the largest maximum
 absolute value in each feature. It does not shift/center the data, and thus 
does not destroy
 any sparsity.
@@ -4976,7 +4976,7 @@ uses 
 
-class pyspark.ml.feature.MinMaxScaler(*args, **kwargs)[source]¶
+class pyspark.ml.feature.MinMaxScaler(min=0.0, max=1.0, 
inputCol=None, outputCol=None)[source]¶
 Rescale each feature individually to a common range [min, max] linearly 
using column summary
 statistics, which is also known as min-max normalization or Rescaling. The 
rescaled value for
 feature E is calculated as,
@@ -5502,7 +5502,7 @@ uses 
 
-class 

[2/6] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.3

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/da71a5c1/site/docs/2.1.3/api/python/pyspark.streaming.html
--
diff --git a/site/docs/2.1.3/api/python/pyspark.streaming.html 
b/site/docs/2.1.3/api/python/pyspark.streaming.html
index 18dcf63..37a5466 100644
--- a/site/docs/2.1.3/api/python/pyspark.streaming.html
+++ b/site/docs/2.1.3/api/python/pyspark.streaming.html
@@ -754,7 +754,8 @@ DStream’s batching interval
 
 
 class Java[source]¶
-
+Bases: object
+
 
 implements = 
['org.apache.spark.streaming.api.java.PythonStreamingListener']¶
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[1/6] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.3

2018-07-03 Thread gurwls223
Repository: spark-website
Updated Branches:
  refs/heads/asf-site 6bbac4966 -> da71a5c1d


http://git-wip-us.apache.org/repos/asf/spark-website/blob/da71a5c1/site/docs/2.1.3/api/python/searchindex.js
--
diff --git a/site/docs/2.1.3/api/python/searchindex.js 
b/site/docs/2.1.3/api/python/searchindex.js
index cfc547a..b57a96a 100644
--- a/site/docs/2.1.3/api/python/searchindex.js
+++ b/site/docs/2.1.3/api/python/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["index","pyspark","pyspark.ml","pyspark.mllib","pyspark.sql","pyspark.streaming"],envversion:52,filenames:["index.rst","pyspark.rst","pyspark.ml.rst","pyspark.mllib.rst","pyspark.sql.rst","pyspark.streaming.rst"],objects:{"":{pyspark:[1,0,0,"-"]},"pyspark.Accumulator":{add:[1,2,1,""],value:[1,3,1,""]},"pyspark.AccumulatorParam":{addInPlace:[1,2,1,""],zero:[1,2,1,""]},"pyspark.BasicProfiler":{profile:[1,2,1,""],stats:[1,2,1,""]},"pyspark.Broadcast":{destroy:[1,2,1,""],dump:[1,2,1,""],load:[1,2,1,""],unpersist:[1,2,1,""],value:[1,3,1,""]},"pyspark.MarshalSerializer":{dumps:[1,2,1,""],loads:[1,2,1,""]},"pyspark.PickleSerializer":{dumps:[1,2,1,""],loads:[1,2,1,""]},"pyspark.Profiler":{dump:[1,2,1,""],profile:[1,2,1,""],show:[1,2,1,""],stats:[1,2,1,""]},"pyspark.RDD":{aggregate:[1,2,1,""],aggregateByKey:[1,2,1,""],cache:[1,2,1,""],cartesian:[1,2,1,""],checkpoint:[1,2,1,""],coalesce:[1,2,1,""],cogroup:[1,2,1,""],collect:[1,2,1,""],collectAsMap:[1,2,1,""],combine
 
ByKey:[1,2,1,""],context:[1,3,1,""],count:[1,2,1,""],countApprox:[1,2,1,""],countApproxDistinct:[1,2,1,""],countByKey:[1,2,1,""],countByValue:[1,2,1,""],distinct:[1,2,1,""],filter:[1,2,1,""],first:[1,2,1,""],flatMap:[1,2,1,""],flatMapValues:[1,2,1,""],fold:[1,2,1,""],foldByKey:[1,2,1,""],foreach:[1,2,1,""],foreachPartition:[1,2,1,""],fullOuterJoin:[1,2,1,""],getCheckpointFile:[1,2,1,""],getNumPartitions:[1,2,1,""],getStorageLevel:[1,2,1,""],glom:[1,2,1,""],groupBy:[1,2,1,""],groupByKey:[1,2,1,""],groupWith:[1,2,1,""],histogram:[1,2,1,""],id:[1,2,1,""],intersection:[1,2,1,""],isCheckpointed:[1,2,1,""],isEmpty:[1,2,1,""],isLocallyCheckpointed:[1,2,1,""],join:[1,2,1,""],keyBy:[1,2,1,""],keys:[1,2,1,""],leftOuterJoin:[1,2,1,""],localCheckpoint:[1,2,1,""],lookup:[1,2,1,""],map:[1,2,1,""],mapPartitions:[1,2,1,""],mapPartitionsWithIndex:[1,2,1,""],mapPartitionsWithSplit:[1,2,1,""],mapValues:[1,2,1,""],max:[1,2,1,""],mean:[1,2,1,""],meanApprox:[1,2,1,""],min:[1,2,1,""],name:[1,2,1,""],parti
 
tionBy:[1,2,1,""],persist:[1,2,1,""],pipe:[1,2,1,""],randomSplit:[1,2,1,""],reduce:[1,2,1,""],reduceByKey:[1,2,1,""],reduceByKeyLocally:[1,2,1,""],repartition:[1,2,1,""],repartitionAndSortWithinPartitions:[1,2,1,""],rightOuterJoin:[1,2,1,""],sample:[1,2,1,""],sampleByKey:[1,2,1,""],sampleStdev:[1,2,1,""],sampleVariance:[1,2,1,""],saveAsHadoopDataset:[1,2,1,""],saveAsHadoopFile:[1,2,1,""],saveAsNewAPIHadoopDataset:[1,2,1,""],saveAsNewAPIHadoopFile:[1,2,1,""],saveAsPickleFile:[1,2,1,""],saveAsSequenceFile:[1,2,1,""],saveAsTextFile:[1,2,1,""],setName:[1,2,1,""],sortBy:[1,2,1,""],sortByKey:[1,2,1,""],stats:[1,2,1,""],stdev:[1,2,1,""],subtract:[1,2,1,""],subtractByKey:[1,2,1,""],sum:[1,2,1,""],sumApprox:[1,2,1,""],take:[1,2,1,""],takeOrdered:[1,2,1,""],takeSample:[1,2,1,""],toDebugString:[1,2,1,""],toLocalIterator:[1,2,1,""],top:[1,2,1,""],treeAggregate:[1,2,1,""],treeReduce:[1,2,1,""],union:[1,2,1,""],unpersist:[1,2,1,""],values:[1,2,1,""],variance:[1,2,1,""],zip:[1,2,1,""],zipWithIndex
 
:[1,2,1,""],zipWithUniqueId:[1,2,1,""]},"pyspark.SparkConf":{contains:[1,2,1,""],get:[1,2,1,""],getAll:[1,2,1,""],set:[1,2,1,""],setAll:[1,2,1,""],setAppName:[1,2,1,""],setExecutorEnv:[1,2,1,""],setIfMissing:[1,2,1,""],setMaster:[1,2,1,""],setSparkHome:[1,2,1,""],toDebugString:[1,2,1,""]},"pyspark.SparkContext":{PACKAGE_EXTENSIONS:[1,3,1,""],accumulator:[1,2,1,""],addFile:[1,2,1,""],addPyFile:[1,2,1,""],applicationId:[1,3,1,""],binaryFiles:[1,2,1,""],binaryRecords:[1,2,1,""],broadcast:[1,2,1,""],cancelAllJobs:[1,2,1,""],cancelJobGroup:[1,2,1,""],defaultMinPartitions:[1,3,1,""],defaultParallelism:[1,3,1,""],dump_profiles:[1,2,1,""],emptyRDD:[1,2,1,""],getConf:[1,2,1,""],getLocalProperty:[1,2,1,""],getOrCreate:[1,4,1,""],hadoopFile:[1,2,1,""],hadoopRDD:[1,2,1,""],newAPIHadoopFile:[1,2,1,""],newAPIHadoopRDD:[1,2,1,""],parallelize:[1,2,1,""],pickleFile:[1,2,1,""],range:[1,2,1,""],runJob:[1,2,1,""],sequenceFile:[1,2,1,""],setCheckpointDir:[1,2,1,""],setJobGroup:[1,2,1,""],setLocalPropert
 

[3/6] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.3

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/da71a5c1/site/docs/2.1.3/api/python/pyspark.sql.html
--
diff --git a/site/docs/2.1.3/api/python/pyspark.sql.html 
b/site/docs/2.1.3/api/python/pyspark.sql.html
index 329ea36..446f743 100644
--- a/site/docs/2.1.3/api/python/pyspark.sql.html
+++ b/site/docs/2.1.3/api/python/pyspark.sql.html
@@ -201,7 +201,7 @@ cluster.
 
 
 
-builder = 
pyspark.sql.session.Builder object¶
+builder = 
pyspark.sql.session.SparkSession.Builder object¶
 
 
 
@@ -270,22 +270,22 @@ omit the struct
 
  l = [(Alice, 1)]
  spark.createDataFrame(l).collect()
-[Row(_1=uAlice, _2=1)]
+[Row(_1=Alice, _2=1)]
  spark.createDataFrame(l, [name, age]).collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  d = [{name: Alice, age: 1}]
  spark.createDataFrame(d).collect()
-[Row(age=1, name=uAlice)]
+[Row(age=1, name=Alice)]
 
 
  rdd = sc.parallelize(l)
  spark.createDataFrame(rdd).collect()
-[Row(_1=uAlice, _2=1)]
+[Row(_1=Alice, _2=1)]
  df = spark.createDataFrame(rdd, [name, age])
  df.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  from pyspark.sql import Row
@@ -293,7 +293,7 @@ omit the struct
  person = rdd.map(lambda r: Person(*r))
  df2 = spark.createDataFrame(person)
  df2.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  from pyspark.sql.types import *
@@ -302,17 +302,17 @@ omit the struct
 ...StructField(age, IntegerType(), 
True)])
  df3 = spark.createDataFrame(rdd, schema)
  df3.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  spark.createDataFrame(df.toPandas()).collect()  
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
  spark.createDataFrame(pandas.DataFrame([[1, 2]])).collect()  
 [Row(0=1, 1=2)]
 
 
  spark.createDataFrame(rdd, a: string, 
b: int).collect()
-[Row(a=uAlice, b=1)]
+[Row(a=Alice, b=1)]
  rdd = rdd.map(lambda row: row[1])
  spark.createDataFrame(rdd, int).collect()
 [Row(value=1)]
@@ -439,7 +439,7 @@ as a streaming  df.createOrReplaceTempView(table1)
  df2 = spark.sql(SELECT 
field1 AS f1, field2 as f2 from table1)
  df2.collect()
-[Row(f1=1, f2=urow1), Row(f1=2, 
f2=urow2), Row(f1=3, f2=urow3)]
+[Row(f1=1, f2=row1), Row(f1=2, f2=row2), 
Row(f1=3, f2=row3)]
 
 
 
@@ -620,22 +620,22 @@ If it’s not a  l = [(Alice, 1)]
  sqlContext.createDataFrame(l).collect()
-[Row(_1=uAlice, _2=1)]
+[Row(_1=Alice, _2=1)]
  sqlContext.createDataFrame(l, [name, age]).collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  d = [{name: Alice, age: 1}]
  sqlContext.createDataFrame(d).collect()
-[Row(age=1, name=uAlice)]
+[Row(age=1, name=Alice)]
 
 
  rdd = sc.parallelize(l)
  sqlContext.createDataFrame(rdd).collect()
-[Row(_1=uAlice, _2=1)]
+[Row(_1=Alice, _2=1)]
  df = sqlContext.createDataFrame(rdd, [name, age])
  df.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  from pyspark.sql import Row
@@ -643,7 +643,7 @@ If it’s not a  person = rdd.map(lambda r: Person(*r))
  df2 = sqlContext.createDataFrame(person)
  df2.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  from pyspark.sql.types import *
@@ -652,17 +652,17 @@ If it’s not a ...StructField(age, IntegerType(), 
True)])
  df3 = sqlContext.createDataFrame(rdd, schema)
  df3.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  sqlContext.createDataFrame(df.toPandas()).collect()  
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
  sqlContext.createDataFrame(pandas.DataFrame([[1, 2]])).collect()  
 [Row(0=1, 1=2)]
 
 
  sqlContext.createDataFrame(rdd, a: string, 
b: int).collect()
-[Row(a=uAlice, b=1)]
+[Row(a=Alice, b=1)]
  rdd = rdd.map(lambda row: row[1])
  sqlContext.createDataFrame(rdd, int).collect()
 [Row(value=1)]
@@ -721,12 +721,12 @@ created external table.
 defaultValue. If the key is not set and defaultValue is None, return
 the system default value.
  sqlContext.getConf(spark.sql.shuffle.partitions)
-u200
+200
  sqlContext.getConf(spark.sql.shuffle.partitions, u10)
-u10
+10
  sqlContext.setConf(spark.sql.shuffle.partitions, u50)
  sqlContext.getConf(spark.sql.shuffle.partitions, u10)
-u50
+50
 
 
 
@@ -880,7 +880,7 @@ be done.  For any other return type, the produced object 
must match the specifie
 
  sqlContext.registerFunction(stringLengthString, lambda x: len(x))
  sqlContext.sql(SELECT stringLengthString(test)).collect()
-[Row(stringLengthString(test)=u4)]
+[Row(stringLengthString(test)=4)]
 
 
  from pyspark.sql.types import IntegerType
@@ -948,7 +948,7 @@ When the return type is not specified we would infer it via 
reflection.
  sqlContext.registerDataFrameAsTable(df, table1)
  df2 = sqlContext.sql(SELECT field1 AS f1, field2 as f2 from 
table1)
  df2.collect()
-[Row(f1=1, f2=urow1), Row(f1=2, 
f2=urow2), Row(f1=3, f2=urow3)]
+[Row(f1=1, f2=row1), Row(f1=2, f2=row2), 
Row(f1=3, f2=row3)]
 
 
 
@@ -1039,7 +1039,7 @@ When the return type is not specified we would infer it 
via 

[4/6] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.3

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/da71a5c1/site/docs/2.1.3/api/python/pyspark.mllib.html
--
diff --git a/site/docs/2.1.3/api/python/pyspark.mllib.html 
b/site/docs/2.1.3/api/python/pyspark.mllib.html
index 705c126..fcb1b09 100644
--- a/site/docs/2.1.3/api/python/pyspark.mllib.html
+++ b/site/docs/2.1.3/api/python/pyspark.mllib.html
@@ -2624,7 +2624,7 @@ Compositionality.
 Querying for synonyms of a word will not return that word:
  syms = model.findSynonyms(a, 2)
  [s[0] for s in syms]
-[ub, uc]
+[b, c]
 
 
 But querying for synonyms of a vector may return the word whose
@@ -2632,7 +2632,7 @@ representation is that vector:
  vec = model.transform(a)
  syms = model.findSynonyms(vec, 2)
  [s[0] for s in syms]
-[ua, ub]
+[a, b]
 
 
  import os, 
tempfile
@@ -2643,7 +2643,7 @@ representation is that vector:
 True
  syms = sameModel.findSynonyms(a, 
2)
  [s[0] for s in syms]
-[ub, uc]
+[b, c]
  from shutil import rmtree
  try:
 ... rmtree(path)
@@ -3034,7 +3034,7 @@ using the Parallel FP-Growth algorithm.
  rdd = sc.parallelize(data, 2)
  model = FPGrowth.train(rdd, 0.6, 2)
  sorted(model.freqItemsets().collect())
-[FreqItemset(items=[ua], freq=4), 
FreqItemset(items=[uc], freq=3), ...
+[FreqItemset(items=[a], freq=4), 
FreqItemset(items=[c], freq=3), ...
  model_path = temp_path + 
/fpm
  model.save(sc, model_path)
  sameModel = FPGrowthModel.load(sc, model_path)
@@ -3132,7 +3132,7 @@ another iteration of distributed prefix growth is run.
  rdd = sc.parallelize(data, 2)
  model = PrefixSpan.train(rdd)
  sorted(model.freqSequences().collect())
-[FreqSequence(sequence=[[ua]], freq=3), 
FreqSequence(sequence=[[ua], [ua]], freq=1), ...
+[FreqSequence(sequence=[[a]], freq=3), 
FreqSequence(sequence=[[a], [a]], freq=1), ...
 
 
 
@@ -4884,7 +4884,7 @@ distribution with the input mean.
 
 
 
-static exponentialVectorRDD(sc, *a, **kw)[source]¶
+static exponentialVectorRDD(sc, mean, numRows, 
numCols, numPartitions=None, seed=None)[source]¶
 Generates an RDD comprised of vectors containing i.i.d. samples drawn
 from the Exponential distribution with the input mean.
 
@@ -4970,7 +4970,7 @@ distribution with the input shape and scale.
 
 
 
-static gammaVectorRDD(sc, *a, **kw)[source]¶
+static gammaVectorRDD(sc, shape, scale, 
numRows, numCols, numPartitions=None, 
seed=None)[source]¶
 Generates an RDD comprised of vectors containing i.i.d. samples drawn
 from the Gamma distribution.
 
@@ -5060,7 +5060,7 @@ distribution with the input mean and standard 
distribution.
 
 
 
-static logNormalVectorRDD(sc, *a, **kw)[source]¶
+static logNormalVectorRDD(sc, mean, std, 
numRows, numCols, numPartitions=None, 
seed=None)[source]¶
 Generates an RDD comprised of vectors containing i.i.d. samples drawn
 from the log normal distribution.
 
@@ -5146,7 +5146,7 @@ to some other normal N(mean, sigma^2), use
 
 
 
-static normalVectorRDD(sc, *a, **kw)[source]¶
+static normalVectorRDD(sc, numRows, numCols, 
numPartitions=None, seed=None)[source]¶
 Generates an RDD comprised of vectors containing i.i.d. samples drawn
 from the standard normal distribution.
 
@@ -5224,7 +5224,7 @@ distribution with the input mean.
 
 
 
-static poissonVectorRDD(sc, *a, **kw)[source]¶
+static poissonVectorRDD(sc, mean, numRows, 
numCols, numPartitions=None, seed=None)[source]¶
 Generates an RDD comprised of vectors containing i.i.d. samples drawn
 from the Poisson distribution with the input mean.
 
@@ -5308,7 +5308,7 @@ to U(a, b), use
 
 
 
-static uniformVectorRDD(sc, *a, **kw)[source]¶
+static uniformVectorRDD(sc, numRows, numCols, 
numPartitions=None, seed=None)[source]¶
 Generates an RDD comprised of vectors containing i.i.d. samples drawn
 from the uniform distribution U(0.0, 1.0).
 
@@ -6579,9 +6579,9 @@ of freedom, p-value, the method used, and the null 
hypothesis.
  print(round(pearson.pValue, 4))
 0.8187
  pearson.method
-upearson
+pearson
  pearson.nullHypothesis
-uobserved follows the same distribution as 
expected.
+observed follows the same distribution as 
expected.
 
 
  observed = Vectors.dense([21, 38, 43, 80])
@@ -6761,7 +6761,7 @@ the method used, and the null hypothesis.
  print(round(ksmodel.statistic, 3))
 0.175
  ksmodel.nullHypothesis
-uSample follows theoretical distribution
+Sample follows theoretical distribution
 
 
  data = sc.parallelize([2.0, 3.0, 4.0])


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[12/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/ml/param/shared.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/ml/param/shared.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/ml/param/shared.html
index f2d428f..60e1169 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/ml/param/shared.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/ml/param/shared.html
@@ -77,7 +77,7 @@
 maxIter = Param(Params._dummy(), maxIter, max number of iterations (= 0)., typeConverter=TypeConverters.toInt)
 
 def __init__(self):
-super(HasMaxIter, self).__init__()
+super(HasMaxIter, self).__init__()
 
 def setMaxIter(self, value):
 
@@ -100,7 +100,7 @@
 regParam = Param(Params._dummy(), regParam, regularization parameter (= 0)., typeConverter=TypeConverters.toFloat)
 
 def __init__(self):
-super(HasRegParam, self).__init__()
+super(HasRegParam, self).__init__()
 
 def setRegParam(self, value):
 
@@ -123,7 +123,7 @@
 featuresCol = Param(Params._dummy(), featuresCol, features column name., 
typeConverter=TypeConverters.toString)
 
 def __init__(self):
-super(HasFeaturesCol, self).__init__()
+super(HasFeaturesCol, self).__init__()
 self._setDefault(featuresCol=features)
 
 def setFeaturesCol(self, value):
@@ -147,7 +147,7 @@
 labelCol = Param(Params._dummy(), labelCol, label column name., typeConverter=TypeConverters.toString)
 
 def __init__(self):
-super(HasLabelCol, self).__init__()
+super(HasLabelCol, self).__init__()
 self._setDefault(labelCol=label)
 
 def setLabelCol(self, value):
@@ -171,7 +171,7 @@
 predictionCol = Param(Params._dummy(), predictionCol, prediction column name., 
typeConverter=TypeConverters.toString)
 
 def __init__(self):
-super(HasPredictionCol, self).__init__()
+super(HasPredictionCol, self).__init__()
 self._setDefault(predictionCol=prediction)
 
 def setPredictionCol(self, value):
@@ -195,7 +195,7 @@
 probabilityCol = Param(Params._dummy(), probabilityCol, Column name for predicted class conditional probabilities. 
Note: Not all models output well-calibrated probability estimates! These 
probabilities should be treated as confidences, not precise 
probabilities., typeConverter=TypeConverters.toString)
 
 def __init__(self):
-super(HasProbabilityCol, self).__init__()
+super(HasProbabilityCol, self).__init__()
 self._setDefault(probabilityCol=probability)
 
 def setProbabilityCol(self, value):
@@ -219,7 +219,7 @@
 rawPredictionCol = Param(Params._dummy(), rawPredictionCol, raw prediction (a.k.a. confidence) column 
name., typeConverter=TypeConverters.toString)
 
 def __init__(self):
-super(HasRawPredictionCol, self).__init__()
+super(HasRawPredictionCol, self).__init__()
 self._setDefault(rawPredictionCol=rawPrediction)
 
 def setRawPredictionCol(self, value):
@@ -243,7 +243,7 @@
 inputCol = Param(Params._dummy(), inputCol, input column name., typeConverter=TypeConverters.toString)
 
 def __init__(self):
-super(HasInputCol, self).__init__()
+super(HasInputCol, self).__init__()
 
 def setInputCol(self, value):
 
@@ -266,7 +266,7 @@
 inputCols = Param(Params._dummy(), inputCols, input column names., typeConverter=TypeConverters.toListString)
 
 def __init__(self):
-super(HasInputCols, self).__init__()
+super(HasInputCols, self).__init__()
 
 def setInputCols(self, value):
 
@@ -289,7 +289,7 @@
 outputCol = Param(Params._dummy(), outputCol, output column name., typeConverter=TypeConverters.toString)
 
 def __init__(self):
-super(HasOutputCol, self).__init__()
+super(HasOutputCol, self).__init__()
 self._setDefault(outputCol=self.uid + __output)
 
 def setOutputCol(self, value):
@@ -313,7 +313,7 @@
 numFeatures = Param(Params._dummy(), numFeatures, number of features., typeConverter=TypeConverters.toInt)
 
 def __init__(self):
-super(HasNumFeatures, self).__init__()
+super(HasNumFeatures, self).__init__()
 
 def setNumFeatures(self, value):
 
@@ -336,7 +336,7 @@
 checkpointInterval = Param(Params._dummy(), checkpointInterval, set checkpoint interval (= 1) or disable checkpoint (-1). 
E.g. 10 means that the cache will get checkpointed every 10 
iterations., typeConverter=TypeConverters.toInt)
 
 def __init__(self):
-super(HasCheckpointInterval, self).__init__()
+super(HasCheckpointInterval, self).__init__()
 
 def setCheckpointInterval(self, value):
 
@@ -359,8 +359,8 @@
 seed = Param(Params._dummy(), seed, random seed., typeConverter=TypeConverters.toInt)
 
 def __init__(self):
-super(HasSeed, 

[07/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/sql/functions.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/sql/functions.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/sql/functions.html
index 61efde4..3c07c92 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/sql/functions.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/sql/functions.html
@@ -86,8 +86,8 @@
 sc = SparkContext._active_spark_context
 jc = getattr(sc._jvm.functions, name)(col._jc if isinstance(col, Column) else col)
 return Column(jc)
-_.__name__ = name
-_.__doc__ = doc
+_.__name__ = name
+_.__doc__ = doc
 return _
 
 
@@ -99,8 +99,8 @@
 jc = getattr(sc._jvm.functions, name)(col1._jc if isinstance(col1, Column) else 
float(col1),
   col2._jc if isinstance(col2, Column) else 
float(col2))
 return Column(jc)
-_.__name__ = name
-_.__doc__ = doc
+_.__name__ = name
+_.__doc__ = doc
 return _
 
 
@@ -110,8 +110,8 @@
 sc = SparkContext._active_spark_context
 jc = getattr(sc._jvm.functions, name)()
 return Column(jc)
-_.__name__ = name
-_.__doc__ = Window 
function:  + doc
+_.__name__ = name
+_.__doc__ = Window 
function:  + doc
 return _
 
 
@@ -1885,7 +1885,7 @@
 jdt = spark._jsparkSession.parseDataType(self.returnType.json())
 if name is None:
 f = self.func
-name = f.__name__ if hasattr(f, __name__) else f.__class__.__name__
+name = f.__name__ 
if hasattr(f, __name__) else f.__class__.__name__
 judf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonFunction(
 name, wrapped_func, jdt)
 return judf
@@ -1929,7 +1929,7 @@
 import doctest
 from pyspark.sql import Row, 
SparkSession
 import pyspark.sql.functions
-globs = pyspark.sql.functions.__dict__.copy()
+globs = pyspark.sql.functions.__dict__.copy()
 spark = SparkSession.builder\
 .master(local[4])\
 .appName(sql.functions tests)\
@@ -1946,7 +1946,7 @@
 exit(-1)
 
 
-if __name__ == __main__:
+if __name__ == __main__:
 _test()
 
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/sql/group.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/sql/group.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/sql/group.html
index f6687c9..c056446 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/sql/group.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/sql/group.html
@@ -74,21 +74,21 @@
 
 def dfapi(f):
 def _api(self):
-name = f.__name__
+name = f.__name__
 jdf = getattr(self._jgd, name)()
 return DataFrame(jdf, self.sql_ctx)
-_api.__name__ = f.__name__
-_api.__doc__ = f.__doc__
+_api.__name__ = f.__name__
+_api.__doc__ = f.__doc__
 return _api
 
 
 def df_varargs_api(f):
 def _api(self, *cols):
-name = f.__name__
+name = f.__name__
 jdf = getattr(self._jgd, name)(_to_seq(self.sql_ctx._sc, cols))
 return DataFrame(jdf, self.sql_ctx)
-_api.__name__ = f.__name__
-_api.__doc__ = f.__doc__
+_api.__name__ = f.__name__
+_api.__doc__ = f.__doc__
 return _api
 
 
@@ -247,7 +247,7 @@
 import doctest
 from pyspark.sql import Row, 
SparkSession
 import pyspark.sql.group
-globs = pyspark.sql.group.__dict__.copy()
+globs = pyspark.sql.group.__dict__.copy()
 spark = SparkSession.builder\
 .master(local[4])\
 .appName(sql.group tests)\
@@ -273,7 +273,7 @@
 exit(-1)
 
 
-if __name__ == __main__:
+if __name__ == __main__:
 _test()
 
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/sql/readwriter.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/sql/readwriter.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/sql/readwriter.html
index bac533f..49f98d9 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/sql/readwriter.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/sql/readwriter.html
@@ -824,7 +824,7 @@
 
 os.chdir(os.environ[SPARK_HOME])
 
-globs = pyspark.sql.readwriter.__dict__.copy()
+globs = pyspark.sql.readwriter.__dict__.copy()
 sc = SparkContext(local[4], PythonTest)
 try:
 spark = SparkSession.builder.enableHiveSupport().getOrCreate()
@@ -844,7 +844,7 @@
 exit(-1)
 
 
-if __name__ == __main__:
+if __name__ == __main__:
 _test()
 
 


[11/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/ml/regression.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/ml/regression.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/ml/regression.html
index f3d5915..4f73b9e 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/ml/regression.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/ml/regression.html
@@ -145,24 +145,24 @@
 
 @keyword_only
 def __init__(self, featuresCol=features, labelCol=label, predictionCol=prediction,
- maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+ maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
  standardization=True, solver=auto, weightCol=None, aggregationDepth=2):
 
 __init__(self, featuresCol=features, 
labelCol=label, predictionCol=prediction, \
  maxIter=100, regParam=0.0, 
elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
  standardization=True, 
solver=auto, weightCol=None, aggregationDepth=2)
 
-super(LinearRegression, self).__init__()
+super(LinearRegression, self).__init__()
 self._java_obj = self._new_java_obj(
 org.apache.spark.ml.regression.LinearRegression, self.uid)
-self._setDefault(maxIter=100, regParam=0.0, tol=1e-6)
+self._setDefault(maxIter=100, regParam=0.0, tol=1e-6)
 kwargs = self._input_kwargs
 self.setParams(**kwargs)
 
 @keyword_only
 @since(1.4.0)
 [docs]
def setParams(self, featuresCol=features, labelCol=label, predictionCol=prediction,
-  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
   standardization=True, solver=auto, weightCol=None, aggregationDepth=2):
 
 setParams(self, featuresCol=features, 
labelCol=label, predictionCol=prediction, \
@@ -213,7 +213,7 @@
 return LinearRegressionTrainingSummary(java_lrt_summary)
 else:
 raise RuntimeError(No 
training summary available for this %s %
-   self.__class__.__name__)
+   self.__class__.__name__)
 
 @property
 @since(2.0.0)
@@ -508,7 +508,7 @@
 __init__(self, featuresCol=features, 
labelCol=label, predictionCol=prediction, \
  weightCol=None, isotonic=True, 
featureIndex=0):
 
-super(IsotonicRegression, self).__init__()
+super(IsotonicRegression, self).__init__()
 self._java_obj = self._new_java_obj(
 org.apache.spark.ml.regression.IsotonicRegression, self.uid)
 self._setDefault(isotonic=True, featureIndex=0)
@@ -589,7 +589,7 @@
 typeConverter=TypeConverters.toFloat)
 
 def __init__(self):
-super(TreeEnsembleParams, self).__init__()
+super(TreeEnsembleParams, self).__init__()
 
 @since(1.4.0)
 def setSubsamplingRate(self, value):
@@ -618,7 +618,7 @@
  , .join(supportedImpurities), typeConverter=TypeConverters.toString)
 
 def __init__(self):
-super(TreeRegressorParams, self).__init__()
+super(TreeRegressorParams, self).__init__()
 
 @since(1.4.0)
 def setImpurity(self, value):
@@ -650,7 +650,7 @@
   typeConverter=TypeConverters.toString)
 
 def __init__(self):
-super(RandomForestParams, self).__init__()
+super(RandomForestParams, self).__init__()
 
 @since(1.4.0)
 def setNumTrees(self, value):
@@ -746,7 +746,7 @@
  maxMemoryInMB=256, cacheNodeIds=False, 
checkpointInterval=10, \
  impurity=variance, seed=None, 
varianceCol=None)
 
-super(DecisionTreeRegressor, self).__init__()
+super(DecisionTreeRegressor, self).__init__()
 self._java_obj = self._new_java_obj(
 org.apache.spark.ml.regression.DecisionTreeRegressor, self.uid)
 self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
@@ -936,7 +936,7 @@
  impurity=variance, 
subsamplingRate=1.0, seed=None, numTrees=20, \
  
featureSubsetStrategy=auto)
 
-super(RandomForestRegressor, self).__init__()
+super(RandomForestRegressor, self).__init__()
 self._java_obj = self._new_java_obj(
 org.apache.spark.ml.regression.RandomForestRegressor, self.uid)
 self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
@@ -1064,7 +1064,7 @@
  checkpointInterval=10, 
lossType=squared, maxIter=20, stepSize=0.1, seed=None, \
  impurity=variance)
 
-super(GBTRegressor, 

[05/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/pyspark.ml.html
--
diff --git a/site/docs/2.1.2/api/python/pyspark.ml.html 
b/site/docs/2.1.2/api/python/pyspark.ml.html
index c7034f0..557d570 100644
--- a/site/docs/2.1.2/api/python/pyspark.ml.html
+++ b/site/docs/2.1.2/api/python/pyspark.ml.html
@@ -567,7 +567,7 @@ uses dir()
 
-class pyspark.ml.Pipeline(*args, 
**kwargs)[source]¶
+class pyspark.ml.Pipeline(stages=None)[source]¶
 A simple pipeline, which acts as an estimator. A Pipeline consists
 of a sequence of stages, each of which is either an
 Estimator or a Transformer. When
@@ -1250,7 +1250,7 @@ uses dir()pyspark.ml.feature module¶
 
 
-class pyspark.ml.feature.Binarizer(*args, **kwargs)[source]¶
+class pyspark.ml.feature.Binarizer(threshold=0.0, inputCol=None, 
outputCol=None)[source]¶
 Binarize a column of continuous features given a threshold.
  df = spark.createDataFrame([(0.5,)], [values])
  binarizer = Binarizer(threshold=1.0, inputCol=values, outputCol=features)
@@ -1520,7 +1520,7 @@ uses dir()
 
-class pyspark.ml.feature.Bucketizer(*args, **kwargs)[source]¶
+class pyspark.ml.feature.Bucketizer(splits=None, inputCol=None, 
outputCol=None, handleInvalid='error')[source]¶
 Maps a column of continuous features to a column of feature buckets.
  values = [(0.1,), (0.4,), (1.2,), (1.5,), (float(nan),), (float(nan),)]
  df = spark.createDataFrame(values, [values])
@@ -1824,7 +1824,7 @@ uses dir()
 
-class pyspark.ml.feature.ChiSqSelector(*args, **kwargs)[source]¶
+class pyspark.ml.feature.ChiSqSelector(numTopFeatures=50, 
featuresCol='features', outputCol=None, 
labelCol='label', selectorType='numTopFeatures', 
percentile=0.1, fpr=0.05)[source]¶
 
 Note
 Experimental
@@ -2399,7 +2399,7 @@ uses dir()
 
-class pyspark.ml.feature.CountVectorizer(*args, **kwargs)[source]¶
+class pyspark.ml.feature.CountVectorizer(minTF=1.0, minDF=1.0, 
vocabSize=262144, binary=False, inputCol=None, 
outputCol=None)[source]¶
 Extracts a vocabulary from document collections and generates a CountVectorizerModel.
  df = spark.createDataFrame(
 ...[(0, [a, b, c]), (1, [a, 
b, b, c, a])],
@@ -2952,7 +2952,7 @@ uses dir()
 
-class pyspark.ml.feature.DCT(*args, 
**kwargs)[source]¶
+class pyspark.ml.feature.DCT(inverse=False, inputCol=None, 
outputCol=None)[source]¶
 A feature transformer that takes the 1D discrete cosine transform
 of a real vector. No zero padding is performed on the input vector.
 It returns a real vector of the same length representing the DCT.
@@ -3230,7 +3230,7 @@ uses dir()
 
-class pyspark.ml.feature.ElementwiseProduct(*args, **kwargs)[source]¶
+class pyspark.ml.feature.ElementwiseProduct(scalingVec=None, inputCol=None, 
outputCol=None)[source]¶
 Outputs the Hadamard product (i.e., the element-wise product) of each 
input vector
 with a provided weight vector. In other words, it scales each 
column of the dataset
 by a scalar multiplier.
@@ -3501,7 +3501,7 @@ uses dir()
 
-class pyspark.ml.feature.HashingTF(*args, **kwargs)[source]¶
+class pyspark.ml.feature.HashingTF(numFeatures=262144, binary=False, 
inputCol=None, outputCol=None)[source]¶
 Maps a sequence of terms to their term frequencies using the hashing 
trick.
 Currently we use Austin Applebys MurmurHash 3 algorithm 
(MurmurHash3_x86_32)
 to calculate the hash code value for the term object.
@@ -3793,7 +3793,7 @@ uses dir()
 
-class pyspark.ml.feature.IDF(*args, 
**kwargs)[source]¶
+class pyspark.ml.feature.IDF(minDocFreq=0, inputCol=None, 
outputCol=None)[source]¶
 Compute the Inverse Document Frequency (IDF) given a collection of 
documents.
  from pyspark.ml.linalg import DenseVector
  df = spark.createDataFrame([(DenseVector([1.0, 2.0]),),
@@ -4272,7 +4272,7 @@ uses dir()
 
-class pyspark.ml.feature.IndexToString(*args, **kwargs)[source]¶
+class pyspark.ml.feature.IndexToString(inputCol=None, outputCol=None, 
labels=None)[source]¶
 A Transformer that maps a column of indices back to a 
new column of
 corresponding string values.
 The index-string mapping is either from the ML attributes of the input column,
@@ -4530,7 +4530,7 @@ uses dir()
 
-class pyspark.ml.feature.MaxAbsScaler(*args, **kwargs)[source]¶
+class pyspark.ml.feature.MaxAbsScaler(inputCol=None, outputCol=None)[source]¶
 Rescale each feature individually to range [-1, 1] by dividing through 
the largest maximum
 absolute value in each feature. It does not shift/center the data, and thus 
does not destroy
 any sparsity.
@@ -4988,7 +4988,7 @@ uses dir()
 
-class pyspark.ml.feature.MinMaxScaler(*args, **kwargs)[source]¶
+class pyspark.ml.feature.MinMaxScaler(min=0.0, max=1.0, 
inputCol=None, outputCol=None)[source]¶
 Rescale each feature individually to a common range [min, max] linearly 
using column summary
 statistics, which is also known as min-max normalization or Rescaling. The 
rescaled value for
 feature E is calculated as,
@@ 

[03/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/pyspark.sql.html
--
diff --git a/site/docs/2.1.2/api/python/pyspark.sql.html 
b/site/docs/2.1.2/api/python/pyspark.sql.html
index e2fbad9..5220468 100644
--- a/site/docs/2.1.2/api/python/pyspark.sql.html
+++ b/site/docs/2.1.2/api/python/pyspark.sql.html
@@ -213,7 +213,7 @@ cluster.
 
 
 
-SparkSession.builder = 
pyspark.sql.session.Builder object¶
+SparkSession.builder = 
pyspark.sql.session.SparkSession.Builder object¶
 
 
 
@@ -282,22 +282,22 @@ omit the struct<
 
  l = [(Alice, 1)]
  spark.createDataFrame(l).collect()
-[Row(_1=uAlice, _2=1)]
+[Row(_1=Alice, _2=1)]
  spark.createDataFrame(l, [name, age]).collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  d = [{name: Alice, age: 1}]
  spark.createDataFrame(d).collect()
-[Row(age=1, name=uAlice)]
+[Row(age=1, name=Alice)]
 
 
  rdd = sc.parallelize(l)
  spark.createDataFrame(rdd).collect()
-[Row(_1=uAlice, _2=1)]
+[Row(_1=Alice, _2=1)]
  df = spark.createDataFrame(rdd, [name, age])
  df.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  from pyspark.sql import Row
@@ -305,7 +305,7 @@ omit the struct<
  person = rdd.map(lambda r: Person(*r))
  df2 = spark.createDataFrame(person)
  df2.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  from pyspark.sql.types import *
@@ -314,17 +314,17 @@ omit the struct<
 ...StructField(age, IntegerType(), 
True)])
  df3 = spark.createDataFrame(rdd, schema)
  df3.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  spark.createDataFrame(df.toPandas()).collect()  
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
  spark.createDataFrame(pandas.DataFrame([[1, 2]])).collect()  
 [Row(0=1, 1=2)]
 
 
  spark.createDataFrame(rdd, a: string, b: int).collect()
-[Row(a=uAlice, b=1)]
+[Row(a=Alice, b=1)]
  rdd = rdd.map(lambda row: row[1])
  spark.createDataFrame(rdd, int).collect()
 [Row(value=1)]
@@ -451,7 +451,7 @@ as a streaming  df.createOrReplaceTempView(table1)
  df2 = spark.sql(SELECT 
field1 AS f1, field2 as f2 from table1)
  df2.collect()
-[Row(f1=1, f2=urow1), Row(f1=2, 
f2=urow2), Row(f1=3, f2=urow3)]
+[Row(f1=1, f2=row1), Row(f1=2, f2=row2), 
Row(f1=3, f2=row3)]
 
 
 
@@ -632,22 +632,22 @@ If its not a  l = [(Alice, 1)]
  sqlContext.createDataFrame(l).collect()
-[Row(_1=uAlice, _2=1)]
+[Row(_1=Alice, _2=1)]
  sqlContext.createDataFrame(l, [name, age]).collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  d = [{name: Alice, age: 1}]
  sqlContext.createDataFrame(d).collect()
-[Row(age=1, name=uAlice)]
+[Row(age=1, name=Alice)]
 
 
  rdd = sc.parallelize(l)
  sqlContext.createDataFrame(rdd).collect()
-[Row(_1=uAlice, _2=1)]
+[Row(_1=Alice, _2=1)]
  df = sqlContext.createDataFrame(rdd, [name, age])
  df.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  from pyspark.sql import Row
@@ -655,7 +655,7 @@ If its not a  person = rdd.map(lambda r: Person(*r))
  df2 = sqlContext.createDataFrame(person)
  df2.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  from pyspark.sql.types import *
@@ -664,17 +664,17 @@ If its not a ...StructField(age, IntegerType(), 
True)])
  df3 = sqlContext.createDataFrame(rdd, schema)
  df3.collect()
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
 
 
  sqlContext.createDataFrame(df.toPandas()).collect()  
-[Row(name=uAlice, age=1)]
+[Row(name=Alice, age=1)]
  sqlContext.createDataFrame(pandas.DataFrame([[1, 2]])).collect()  
 [Row(0=1, 1=2)]
 
 
  sqlContext.createDataFrame(rdd, a: string, b: int).collect()
-[Row(a=uAlice, b=1)]
+[Row(a=Alice, b=1)]
  rdd = rdd.map(lambda row: row[1])
  sqlContext.createDataFrame(rdd, int).collect()
 [Row(value=1)]
@@ -733,12 +733,12 @@ created external table.
 defaultValue. If the key is not set and defaultValue is None, return
 the system default value.
  sqlContext.getConf(spark.sql.shuffle.partitions)
-u200
- sqlContext.getConf(spark.sql.shuffle.partitions, u10)
-u10
- sqlContext.setConf(spark.sql.shuffle.partitions, u50)
- sqlContext.getConf(spark.sql.shuffle.partitions, u10)
-u50
+200
+ sqlContext.getConf(spark.sql.shuffle.partitions, u10)
+10
+ sqlContext.setConf(spark.sql.shuffle.partitions, u50)
+ sqlContext.getConf(spark.sql.shuffle.partitions, u10)
+50
 
 
 
@@ -892,7 +892,7 @@ be done.  For any other return type, the produced object 
must match the specifie
 
  sqlContext.registerFunction(stringLengthString, lambda x: len(x))
  sqlContext.sql(SELECT stringLengthString(test)).collect()
-[Row(stringLengthString(test)=u4)]
+[Row(stringLengthString(test)=4)]
 
 
  from pyspark.sql.types import IntegerType
@@ -960,7 +960,7 @@ When the return type is not specified we would infer it via 
reflection.
  sqlContext.registerDataFrameAsTable(df, table1)
  df2 = sqlContext.sql(SELECT field1 AS f1, field2 as f2 from 
table1)
  df2.collect()
-[Row(f1=1, f2=urow1), Row(f1=2, 

[14/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
Fix signature description broken in PySpark API documentation in 2.1.2


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/6bbac496
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/6bbac496
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/6bbac496

Branch: refs/heads/asf-site
Commit: 6bbac4966e79156af1e57d6c1da013ee9e828ea6
Parents: 7751277
Author: hyukjinkwon 
Authored: Tue Jul 3 01:32:55 2018 +0800
Committer: hyukjinkwon 
Committed: Tue Jul 3 01:32:55 2018 +0800

--
 .../python/_modules/pyspark/accumulators.html   |   2 +-
 .../api/python/_modules/pyspark/broadcast.html  |   6 +-
 .../2.1.2/api/python/_modules/pyspark/conf.html |   4 +-
 .../api/python/_modules/pyspark/context.html|   8 +-
 .../api/python/_modules/pyspark/files.html  |  10 +-
 .../_modules/pyspark/ml/classification.html |  54 ++--
 .../python/_modules/pyspark/ml/clustering.html  |  24 +-
 .../python/_modules/pyspark/ml/evaluation.html  |  10 +-
 .../api/python/_modules/pyspark/ml/feature.html |  60 ++---
 .../api/python/_modules/pyspark/ml/linalg.html  |  42 +--
 .../api/python/_modules/pyspark/ml/param.html   |   8 +-
 .../_modules/pyspark/ml/param/shared.html   |  52 ++--
 .../python/_modules/pyspark/ml/pipeline.html|  28 +-
 .../_modules/pyspark/ml/recommendation.html |   6 +-
 .../python/_modules/pyspark/ml/regression.html  |  46 ++--
 .../api/python/_modules/pyspark/ml/tuning.html  |  12 +-
 .../api/python/_modules/pyspark/ml/util.html|  30 +--
 .../api/python/_modules/pyspark/ml/wrapper.html |   8 +-
 .../_modules/pyspark/mllib/classification.html  |  28 +-
 .../_modules/pyspark/mllib/clustering.html  |  32 +--
 .../python/_modules/pyspark/mllib/common.html   |  12 +-
 .../_modules/pyspark/mllib/evaluation.html  |  14 +-
 .../python/_modules/pyspark/mllib/feature.html  |   4 +-
 .../api/python/_modules/pyspark/mllib/fpm.html  |  12 +-
 .../python/_modules/pyspark/mllib/linalg.html   |  42 +--
 .../pyspark/mllib/linalg/distributed.html   |   4 +-
 .../python/_modules/pyspark/mllib/random.html   |   2 +-
 .../_modules/pyspark/mllib/recommendation.html  |  18 +-
 .../_modules/pyspark/mllib/regression.html  |  22 +-
 .../api/python/_modules/pyspark/mllib/tree.html |  40 +--
 .../api/python/_modules/pyspark/mllib/util.html |  20 +-
 .../api/python/_modules/pyspark/profiler.html   |   4 +-
 .../2.1.2/api/python/_modules/pyspark/rdd.html  |  16 +-
 .../python/_modules/pyspark/serializers.html|  70 ++---
 .../api/python/_modules/pyspark/sql/column.html |  68 ++---
 .../python/_modules/pyspark/sql/context.html|  22 +-
 .../python/_modules/pyspark/sql/dataframe.html  |  26 +-
 .../python/_modules/pyspark/sql/functions.html  |  18 +-
 .../api/python/_modules/pyspark/sql/group.html  |  16 +-
 .../python/_modules/pyspark/sql/readwriter.html |   4 +-
 .../python/_modules/pyspark/sql/session.html|   6 +-
 .../python/_modules/pyspark/sql/streaming.html  |   4 +-
 .../api/python/_modules/pyspark/sql/types.html  |  74 ++---
 .../api/python/_modules/pyspark/sql/window.html |   2 +-
 .../_modules/pyspark/streaming/context.html |  28 +-
 .../_modules/pyspark/streaming/dstream.html |  10 +-
 .../_modules/pyspark/streaming/kafka.html   |  24 +-
 site/docs/2.1.2/api/python/_static/pygments.css |   4 +
 site/docs/2.1.2/api/python/pyspark.html |  47 ++--
 site/docs/2.1.2/api/python/pyspark.ml.html  | 150 +--
 site/docs/2.1.2/api/python/pyspark.mllib.html   |  63 ++---
 site/docs/2.1.2/api/python/pyspark.sql.html | 267 +--
 .../2.1.2/api/python/pyspark.streaming.html |   3 +-
 site/docs/2.1.2/api/python/searchindex.js   |   2 +-
 54 files changed, 788 insertions(+), 800 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/accumulators.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/accumulators.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/accumulators.html
index b440fa0..5b1c432 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/accumulators.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/accumulators.html
@@ -310,7 +310,7 @@
 thread.start()
 return server
 
-if __name__ == __main__:
+if __name__ == __main__:
 import doctest
 (failure_count, test_count) 
= doctest.testmod()
 if failure_count:

http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/broadcast.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/broadcast.html 

[08/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/serializers.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/serializers.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/serializers.html
index 7d555b9..d432c3f 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/serializers.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/serializers.html
@@ -158,13 +158,13 @@
 # subclasses should override __eq__ as appropriate.
 
 def __eq__(self, other):
-return isinstance(other, self.__class__) and 
self.__dict__ == other.__dict__
+return isinstance(other, self.__class__) and 
self.__dict__ == other.__dict__
 
 def __ne__(self, other):
-return not self.__eq__(other)
+return not self.__eq__(other)
 
 def __repr__(self):
-return %s() % 
self.__class__.__name__
+return %s() % 
self.__class__.__name__
 
 def __hash__(self):
 return hash(str(self))
@@ -285,7 +285,7 @@
 have similar sizes.
 
 def __init__(self, serializer, batchSize=10):
-BatchedSerializer.__init__(self, serializer, batchSize)
+BatchedSerializer.__init__(self, serializer, batchSize)
 
 def _batched(self, iterator):
 n = self.batchSize
@@ -306,7 +306,7 @@
 
 
 def __init__(self, serializer, bestSize=1 
 16):
-BatchedSerializer.__init__(self, serializer, self.UNKNOWN_BATCH_SIZE)
+BatchedSerializer.__init__(self, serializer, self.UNKNOWN_BATCH_SIZE)
 self.bestSize = bestSize
 
 def dump_stream(self, iterator, stream):
@@ -408,23 +408,23 @@
 def _restore(name, fields, value):
  Restore an object of 
namedtuple
 k = (name, fields)
-cls = __cls.get(k)
-if cls is None:
-cls = collections.namedtuple(name, fields)
-__cls[k] = cls
-return cls(*value)
+cls = __cls.get(k)
+if cls is None:
+cls = collections.namedtuple(name, fields)
+__cls[k] = cls
+return cls(*value)
 
 
-def _hack_namedtuple(cls):
+def _hack_namedtuple(cls):
  Make class generated by namedtuple 
picklable 
-name = cls.__name__
-fields = cls._fields
+name = cls.__name__
+fields = cls._fields
 
 def __reduce__(self):
 return (_restore, (name, fields, tuple(self)))
-cls.__reduce__ = __reduce__
-cls._is_namedtuple_ = True
-return cls
+cls.__reduce__ = __reduce__
+cls._is_namedtuple_ = True
+return cls
 
 
 def _hijack_namedtuple():
@@ -437,8 +437,8 @@
 global _old_namedtuple_kwdefaults  # or it will put 
in closure too
 
 def _copy_func(f):
-return types.FunctionType(f.__code__, f.__globals__, 
f.__name__,
-  f.__defaults__, 
f.__closure__)
+return types.FunctionType(f.__code__, f.__globals__, f.__name__,
+  f.__defaults__, 
f.__closure__)
 
 def _kwdefaults(f):
 # __kwdefaults__ contains the default values of 
keyword-only arguments which are
@@ -461,23 +461,23 @@
 def namedtuple(*args, **kwargs):
 for k, v in _old_namedtuple_kwdefaults.items():
 kwargs[k] = kwargs.get(k, v)
-cls = _old_namedtuple(*args, **kwargs)
-return _hack_namedtuple(cls)
+cls = _old_namedtuple(*args, **kwargs)
+return _hack_namedtuple(cls)
 
 # replace namedtuple with new one
-collections.namedtuple.__globals__[_old_namedtuple_kwdefaults] = _old_namedtuple_kwdefaults
-collections.namedtuple.__globals__[_old_namedtuple] = _old_namedtuple
-collections.namedtuple.__globals__[_hack_namedtuple] = _hack_namedtuple
-collections.namedtuple.__code__ = namedtuple.__code__
+collections.namedtuple.__globals__[_old_namedtuple_kwdefaults] = _old_namedtuple_kwdefaults
+collections.namedtuple.__globals__[_old_namedtuple] = _old_namedtuple
+collections.namedtuple.__globals__[_hack_namedtuple] = _hack_namedtuple
+collections.namedtuple.__code__ = namedtuple.__code__
 collections.namedtuple.__hijack = 1
 
 # hack the cls already generated by namedtuple
 # those created in other module can be pickled as 
normal,
 # so only hack those in __main__ module
-for n, o in sys.modules[__main__].__dict__.items():
+for n, o in sys.modules[__main__].__dict__.items():
 if (type(o) is type 
and o.__base__ is 
tuple
 and hasattr(o, _fields)
-and __reduce__ not in o.__dict__):
+and __reduce__ not in o.__dict__):
 _hack_namedtuple(o)  # hack inplace
 
 
@@ -536,23 +536,23 @@
 
 
 def __init__(self):
-FramedSerializer.__init__(self)
+FramedSerializer.__init__(self)
 self._type = None
 
 def dumps(self, obj):
 if self._type is not None:
-return bP + pickle.dumps(obj, -1)
+return bP + pickle.dumps(obj, 

[06/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/streaming/dstream.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/streaming/dstream.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/streaming/dstream.html
index 48eb5cc..853b230 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/streaming/dstream.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/streaming/dstream.html
@@ -202,7 +202,7 @@
 
 Apply a function to each RDD in this DStream.
 
-if func.__code__.co_argcount == 1:
+if func.__code__.co_argcount == 1:
 old_func = func
 func = lambda t, rdd: old_func(rdd)
 jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer)
@@ -338,10 +338,10 @@
 `func` can have one argument of `rdd`, or have two 
arguments of
 (`time`, `rdd`)
 
-if func.__code__.co_argcount == 1:
+if func.__code__.co_argcount == 1:
 oldfunc = func
 func = lambda t, rdd: oldfunc(rdd)
-assert func.__code__.co_argcount == 2, func should 
take one or two arguments
+assert func.__code__.co_argcount == 2, func should 
take one or two arguments
 return TransformedDStream(self, func)
 
 [docs]
def transformWith(self, func, other, keepSerializer=False):
@@ -352,10 +352,10 @@
 `func` can have two arguments of (`rdd_a`, `rdd_b`) 
or have three
 arguments of (`time`, `rdd_a`, `rdd_b`)
 
-if func.__code__.co_argcount == 2:
+if func.__code__.co_argcount == 2:
 oldfunc = func
 func = lambda t, a, b: oldfunc(a, b)
-assert func.__code__.co_argcount == 3, func should 
take two or three arguments
+assert func.__code__.co_argcount == 3, func should 
take two or three arguments
 jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer, other._jrdd_deserializer)
 dstream = self._sc._jvm.PythonTransformed2DStream(self._jdstream.dstream(),
   other._jdstream.dstream(), jfunc)

http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/streaming/kafka.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/streaming/kafka.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/streaming/kafka.html
index 52f3960..c7e8fbf 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/streaming/kafka.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/streaming/kafka.html
@@ -288,7 +288,7 @@
 self.untilOffset = untilOffset
 
 def __eq__(self, other):
-if isinstance(other, self.__class__):
+if isinstance(other, self.__class__):
 return (self.topic 
== other.topic
 and self.partition 
== other.partition
 and self.fromOffset 
== other.fromOffset
@@ -297,7 +297,7 @@
 return False
 
 def __ne__(self, other):
-return not self.__eq__(other)
+return not self.__eq__(other)
 
 def __str__(self):
 return OffsetRange(topic: %s, partition: %d, 
range: [%d - %d] \
@@ -326,17 +326,17 @@
 return helper.createTopicAndPartition(self._topic, self._partition)
 
 def __eq__(self, other):
-if isinstance(other, self.__class__):
+if isinstance(other, self.__class__):
 return (self._topic 
== other._topic
 and self._partition 
== other._partition)
 else:
 return False
 
 def __ne__(self, other):
-return not self.__eq__(other)
+return not self.__eq__(other)
 
 def __hash__(self):
-return (self._topic, self._partition).__hash__()
+return (self._topic, self._partition).__hash__()
 
 
 [docs]class Broker(object):
@@ -363,7 +363,7 @@
 
 
 def __init__(self, jrdd, ctx, jrdd_deserializer):
-RDD.__init__(self, jrdd, ctx, jrdd_deserializer)
+RDD.__init__(self, jrdd, ctx, jrdd_deserializer)
 
 def offsetRanges(self):
 
@@ -383,13 +383,13 @@
 
 
 def __init__(self, jdstream, ssc, jrdd_deserializer):
-DStream.__init__(self, jdstream, ssc, jrdd_deserializer)
+DStream.__init__(self, jdstream, ssc, jrdd_deserializer)
 
 def foreachRDD(self, func):
 
 Apply a function to each RDD in this DStream.
 
-if func.__code__.co_argcount == 1:
+if func.__code__.co_argcount == 1:
 old_func = func
 func = lambda r, rdd: old_func(rdd)
 jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer) \
@@ -405,10 +405,10 @@
 `func` can have one argument of `rdd`, or have two 
arguments of
 (`time`, `rdd`)
 
-if 

[01/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
Repository: spark-website
Updated Branches:
  refs/heads/asf-site 775127770 -> 6bbac4966


http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/searchindex.js
--
diff --git a/site/docs/2.1.2/api/python/searchindex.js 
b/site/docs/2.1.2/api/python/searchindex.js
index 6dd6c4c..3aaa727 100644
--- a/site/docs/2.1.2/api/python/searchindex.js
+++ b/site/docs/2.1.2/api/python/searchindex.js
@@ -1 +1 @@
-Search.setIndex({envversion:49,filenames:["index","pyspark","pyspark.ml","pyspark.mllib","pyspark.sql","pyspark.streaming"],objects:{"":{pyspark:[1,0,0,"-"]},"pyspark.Accumulator":{add:[1,2,1,""],value:[1,3,1,""]},"pyspark.AccumulatorParam":{addInPlace:[1,2,1,""],zero:[1,2,1,""]},"pyspark.BasicProfiler":{profile:[1,2,1,""],stats:[1,2,1,""]},"pyspark.Broadcast":{destroy:[1,2,1,""],dump:[1,2,1,""],load:[1,2,1,""],unpersist:[1,2,1,""],value:[1,3,1,""]},"pyspark.MarshalSerializer":{dumps:[1,2,1,""],loads:[1,2,1,""]},"pyspark.PickleSerializer":{dumps:[1,2,1,""],loads:[1,2,1,""]},"pyspark.Profiler":{dump:[1,2,1,""],profile:[1,2,1,""],show:[1,2,1,""],stats:[1,2,1,""]},"pyspark.RDD":{aggregate:[1,2,1,""],aggregateByKey:[1,2,1,""],cache:[1,2,1,""],cartesian:[1,2,1,""],checkpoint:[1,2,1,""],coalesce:[1,2,1,""],cogroup:[1,2,1,""],collect:[1,2,1,""],collectAsMap:[1,2,1,""],combineByKey:[1,2,1,""],context:[1,3,1,""],count:[1,2,1,""],countApprox:[1,2,1,""],countApproxDistinct:[1,2,1,""],countByKe
 
y:[1,2,1,""],countByValue:[1,2,1,""],distinct:[1,2,1,""],filter:[1,2,1,""],first:[1,2,1,""],flatMap:[1,2,1,""],flatMapValues:[1,2,1,""],fold:[1,2,1,""],foldByKey:[1,2,1,""],foreach:[1,2,1,""],foreachPartition:[1,2,1,""],fullOuterJoin:[1,2,1,""],getCheckpointFile:[1,2,1,""],getNumPartitions:[1,2,1,""],getStorageLevel:[1,2,1,""],glom:[1,2,1,""],groupBy:[1,2,1,""],groupByKey:[1,2,1,""],groupWith:[1,2,1,""],histogram:[1,2,1,""],id:[1,2,1,""],intersection:[1,2,1,""],isCheckpointed:[1,2,1,""],isEmpty:[1,2,1,""],isLocallyCheckpointed:[1,2,1,""],join:[1,2,1,""],keyBy:[1,2,1,""],keys:[1,2,1,""],leftOuterJoin:[1,2,1,""],localCheckpoint:[1,2,1,""],lookup:[1,2,1,""],map:[1,2,1,""],mapPartitions:[1,2,1,""],mapPartitionsWithIndex:[1,2,1,""],mapPartitionsWithSplit:[1,2,1,""],mapValues:[1,2,1,""],max:[1,2,1,""],mean:[1,2,1,""],meanApprox:[1,2,1,""],min:[1,2,1,""],name:[1,2,1,""],partitionBy:[1,2,1,""],persist:[1,2,1,""],pipe:[1,2,1,""],randomSplit:[1,2,1,""],reduce:[1,2,1,""],reduceByKey:[1,2,1,""]
 
,reduceByKeyLocally:[1,2,1,""],repartition:[1,2,1,""],repartitionAndSortWithinPartitions:[1,2,1,""],rightOuterJoin:[1,2,1,""],sample:[1,2,1,""],sampleByKey:[1,2,1,""],sampleStdev:[1,2,1,""],sampleVariance:[1,2,1,""],saveAsHadoopDataset:[1,2,1,""],saveAsHadoopFile:[1,2,1,""],saveAsNewAPIHadoopDataset:[1,2,1,""],saveAsNewAPIHadoopFile:[1,2,1,""],saveAsPickleFile:[1,2,1,""],saveAsSequenceFile:[1,2,1,""],saveAsTextFile:[1,2,1,""],setName:[1,2,1,""],sortBy:[1,2,1,""],sortByKey:[1,2,1,""],stats:[1,2,1,""],stdev:[1,2,1,""],subtract:[1,2,1,""],subtractByKey:[1,2,1,""],sum:[1,2,1,""],sumApprox:[1,2,1,""],take:[1,2,1,""],takeOrdered:[1,2,1,""],takeSample:[1,2,1,""],toDebugString:[1,2,1,""],toLocalIterator:[1,2,1,""],top:[1,2,1,""],treeAggregate:[1,2,1,""],treeReduce:[1,2,1,""],union:[1,2,1,""],unpersist:[1,2,1,""],values:[1,2,1,""],variance:[1,2,1,""],zip:[1,2,1,""],zipWithIndex:[1,2,1,""],zipWithUniqueId:[1,2,1,""]},"pyspark.SparkConf":{contains:[1,2,1,""],get:[1,2,1,""],getAll:[1,2,1,""],se
 
t:[1,2,1,""],setAll:[1,2,1,""],setAppName:[1,2,1,""],setExecutorEnv:[1,2,1,""],setIfMissing:[1,2,1,""],setMaster:[1,2,1,""],setSparkHome:[1,2,1,""],toDebugString:[1,2,1,""]},"pyspark.SparkContext":{PACKAGE_EXTENSIONS:[1,3,1,""],accumulator:[1,2,1,""],addFile:[1,2,1,""],addPyFile:[1,2,1,""],applicationId:[1,3,1,""],binaryFiles:[1,2,1,""],binaryRecords:[1,2,1,""],broadcast:[1,2,1,""],cancelAllJobs:[1,2,1,""],cancelJobGroup:[1,2,1,""],defaultMinPartitions:[1,3,1,""],defaultParallelism:[1,3,1,""],dump_profiles:[1,2,1,""],emptyRDD:[1,2,1,""],getConf:[1,2,1,""],getLocalProperty:[1,2,1,""],getOrCreate:[1,4,1,""],hadoopFile:[1,2,1,""],hadoopRDD:[1,2,1,""],newAPIHadoopFile:[1,2,1,""],newAPIHadoopRDD:[1,2,1,""],parallelize:[1,2,1,""],pickleFile:[1,2,1,""],range:[1,2,1,""],runJob:[1,2,1,""],sequenceFile:[1,2,1,""],setCheckpointDir:[1,2,1,""],setJobGroup:[1,2,1,""],setLocalProperty:[1,2,1,""],setLogLevel:[1,2,1,""],setSystemProperty:[1,4,1,""],show_profiles:[1,2,1,""],sparkUser:[1,2,1,""],start
 

[02/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/pyspark.streaming.html
--
diff --git a/site/docs/2.1.2/api/python/pyspark.streaming.html 
b/site/docs/2.1.2/api/python/pyspark.streaming.html
index 8a2002d..5c1817d 100644
--- a/site/docs/2.1.2/api/python/pyspark.streaming.html
+++ b/site/docs/2.1.2/api/python/pyspark.streaming.html
@@ -766,7 +766,8 @@ DStreams batching interval
 
 
 class Java[source]¶
-
+Bases: object
+
 
 implements = 
['org.apache.spark.streaming.api.java.PythonStreamingListener']¶
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[04/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/pyspark.mllib.html
--
diff --git a/site/docs/2.1.2/api/python/pyspark.mllib.html 
b/site/docs/2.1.2/api/python/pyspark.mllib.html
index 354fa24..53418fa 100644
--- a/site/docs/2.1.2/api/python/pyspark.mllib.html
+++ b/site/docs/2.1.2/api/python/pyspark.mllib.html
@@ -936,7 +936,7 @@ of points (if  1.0) of a divisible cluster.
  data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
  model = KMeans.train(
 ... sc.parallelize(data), 2, maxIterations=10, initializationMode=random,
-...seed=50, initializationSteps=5, epsilon=1e-4)
+...seed=50, initializationSteps=5, epsilon=1e-4)
  model.predict(array([0.0, 0.0])) == model.predict(array([1.0, 1.0]))
 True
  model.predict(array([8.0, 9.0])) == model.predict(array([9.0, 8.0]))
@@ -953,7 +953,7 @@ of points (if  1.0) of a divisible cluster.
 ... SparseVector(3, {2: 1.1})
 ... ]
  model = KMeans.train(sc.parallelize(sparse_data), 
2, initializationMode=k-means||,
-... seed=50, initializationSteps=5, epsilon=1e-4)
+... seed=50, initializationSteps=5, epsilon=1e-4)
  model.predict(array([0., 1., 0.])) == model.predict(array([0, 1.1, 0.]))
 True
  model.predict(array([0., 0., 1.])) == model.predict(array([0, 0, 1.1]))
@@ -1579,25 +1579,18 @@ a gaussian population with constant weights.
 n_t+1 = n_t * a + m_t
 
 where
-
-c_t: Centroid at the n_th iteration.
-
+
+c_t: Centroid at the n_th iteration.
 
 n_t: Number of samples (or) weights associated with the centroid
-at the n_th iteration.
-
+at the n_th iteration.
 
 
-x_t: Centroid of the new data closest to c_t.
-
-m_t: Number of samples (or) weights of the new data 
closest to c_t
-
-c_t+1: New centroid.
-
-n_t+1: New number of weights.
-
-a: Decay Factor, which gives the forgetfulness.
-
+x_t: Centroid of the new data closest to c_t.
+m_t: Number of samples (or) weights of the new data closest to c_t
+c_t+1: New centroid.
+n_t+1: New number of weights.
+a: Decay Factor, which gives the forgetfulness.
 
 
 Note
@@ -1622,7 +1615,7 @@ forgotten.
  stkm = StreamingKMeansModel(initCenters, 
initWeights)
  data = sc.parallelize([[-0.1, -0.1], [0.1, 0.1],
 ...[0.9, 0.9], [1.1, 1.1]])
- stkm = stkm.update(data, 1.0, ubatches)
+ stkm = stkm.update(data, 1.0, ubatches)
  stkm.centers
 array([[ 0.,  0.],
[ 1.,  1.]])
@@ -1634,7 +1627,7 @@ forgotten.
 [3.0, 3.0]
  decayFactor = 0.0
  data = sc.parallelize([DenseVector([1.5, 1.5]), DenseVector([0.2, 0.2])])
- stkm = stkm.update(data, 0.0, ubatches)
+ stkm = stkm.update(data, 0.0, ubatches)
  stkm.centers
 array([[ 0.2,  0.2],
[ 1.5,  1.5]])
@@ -2643,7 +2636,7 @@ Compositionality.
 Querying for synonyms of a word will not return that word:
  syms = model.findSynonyms(a, 2)
  [s[0] for s in syms]
-[ub, uc]
+[b, c]
 
 
 But querying for synonyms of a vector may return the word whose
@@ -2651,7 +2644,7 @@ representation is that vector:
  vec = model.transform(a)
  syms = model.findSynonyms(vec, 2)
  [s[0] for s in syms]
-[ua, ub]
+[a, b]
 
 
  import os, tempfile
@@ -2662,7 +2655,7 @@ representation is that vector:
 True
  syms = sameModel.findSynonyms(a, 
2)
  [s[0] for s in syms]
-[ub, uc]
+[b, c]
  from shutil import rmtree
  try:
 ... rmtree(path)
@@ -3053,7 +3046,7 @@ using the Parallel FP-Growth algorithm.
  rdd = sc.parallelize(data, 2)
  model = FPGrowth.train(rdd, 0.6, 2)
  sorted(model.freqItemsets().collect())
-[FreqItemset(items=[ua], freq=4), 
FreqItemset(items=[uc], freq=3), ...
+[FreqItemset(items=[a], freq=4), 
FreqItemset(items=[c], freq=3), ...
  model_path = temp_path + 
/fpm
  model.save(sc, model_path)
  sameModel = FPGrowthModel.load(sc, model_path)
@@ -3151,7 +3144,7 @@ another iteration of distributed prefix growth is run.
  rdd = sc.parallelize(data, 2)
  model = PrefixSpan.train(rdd)
  sorted(model.freqSequences().collect())
-[FreqSequence(sequence=[[ua]], freq=3), 
FreqSequence(sequence=[[ua], [ua]], freq=1), ...
+[FreqSequence(sequence=[[a]], freq=3), 
FreqSequence(sequence=[[a], [a]], freq=1), ...
 
 
 
@@ -4903,7 +4896,7 @@ distribution with the input mean.
 
 
 
-static exponentialVectorRDD(sc, *a, **kw)[source]¶
+static exponentialVectorRDD(sc, mean, numRows, 
numCols, numPartitions=None, seed=None)[source]¶
 Generates an RDD comprised of vectors containing i.i.d. samples drawn
 from the Exponential distribution with the input mean.
 
@@ -4989,7 +4982,7 @@ distribution with the input shape and scale.
 
 
 
-static gammaVectorRDD(sc, *a, **kw)[source]¶
+static gammaVectorRDD(sc, shape, scale, 
numRows, numCols, numPartitions=None, 
seed=None)[source]¶
 Generates an RDD comprised of vectors containing i.i.d. samples drawn
 from the Gamma distribution.
 
@@ -5079,7 +5072,7 @@ distribution with the input mean and 

[13/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/ml/feature.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/ml/feature.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/ml/feature.html
index 776d399..3a2c2a1 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/ml/feature.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/ml/feature.html
@@ -137,7 +137,7 @@
 
 __init__(self, threshold=0.0, inputCol=None, 
outputCol=None)
 
-super(Binarizer, self).__init__()
+super(Binarizer, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.Binarizer, self.uid)
 self._setDefault(threshold=0.0)
 kwargs = self._input_kwargs
@@ -223,7 +223,7 @@
 
 __init__(self, splits=None, inputCol=None, 
outputCol=None, handleInvalid=error)
 
-super(Bucketizer, self).__init__()
+super(Bucketizer, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.Bucketizer, self.uid)
 self._setDefault(handleInvalid=error)
 kwargs = self._input_kwargs
@@ -336,7 +336,7 @@
 __init__(self, minTF=1.0, minDF=1.0, vocabSize=1 
 18, binary=False, inputCol=None,\
  outputCol=None)
 
-super(CountVectorizer, self).__init__()
+super(CountVectorizer, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.CountVectorizer,
 self.uid)
 self._setDefault(minTF=1.0, minDF=1.0, vocabSize=1 
 18, binary=False)
@@ -469,7 +469,7 @@
 
 __init__(self, inverse=False, inputCol=None, 
outputCol=None)
 
-super(DCT, self).__init__()
+super(DCT, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.DCT, self.uid)
 self._setDefault(inverse=False)
 kwargs = self._input_kwargs
@@ -533,7 +533,7 @@
 
 __init__(self, scalingVec=None, inputCol=None, 
outputCol=None)
 
-super(ElementwiseProduct, self).__init__()
+super(ElementwiseProduct, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.ElementwiseProduct,
 self.uid)
 kwargs = self._input_kwargs
@@ -603,7 +603,7 @@
 
 __init__(self, numFeatures=1  18, 
binary=False, inputCol=None, outputCol=None)
 
-super(HashingTF, self).__init__()
+super(HashingTF, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.HashingTF, self.uid)
 self._setDefault(numFeatures=1 
 18, binary=False)
 kwargs = self._input_kwargs
@@ -676,7 +676,7 @@
 
 __init__(self, minDocFreq=0, inputCol=None, 
outputCol=None)
 
-super(IDF, self).__init__()
+super(IDF, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.IDF, self.uid)
 self._setDefault(minDocFreq=0)
 kwargs = self._input_kwargs
@@ -766,7 +766,7 @@
 
 __init__(self, inputCol=None, outputCol=None)
 
-super(MaxAbsScaler, self).__init__()
+super(MaxAbsScaler, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.MaxAbsScaler, self.uid)
 self._setDefault()
 kwargs = self._input_kwargs
@@ -860,7 +860,7 @@
 
 __init__(self, min=0.0, max=1.0, inputCol=None, 
outputCol=None)
 
-super(MinMaxScaler, self).__init__()
+super(MinMaxScaler, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.MinMaxScaler, self.uid)
 self._setDefault(min=0.0, max=1.0)
 kwargs = self._input_kwargs
@@ -978,7 +978,7 @@
 
 __init__(self, n=2, inputCol=None, 
outputCol=None)
 
-super(NGram, self).__init__()
+super(NGram, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.NGram, self.uid)
 self._setDefault(n=2)
 kwargs = self._input_kwargs
@@ -1042,7 +1042,7 @@
 
 __init__(self, p=2.0, inputCol=None, 
outputCol=None)
 
-super(Normalizer, self).__init__()
+super(Normalizer, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.Normalizer, self.uid)
 self._setDefault(p=2.0)
 kwargs = self._input_kwargs
@@ -1122,7 +1122,7 @@
 
 __init__(self, includeFirst=True, inputCol=None, 
outputCol=None)
 
-super(OneHotEncoder, self).__init__()
+super(OneHotEncoder, self).__init__()
 self._java_obj = self._new_java_obj(org.apache.spark.ml.feature.OneHotEncoder, self.uid)
 

[09/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/mllib/regression.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/mllib/regression.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/mllib/regression.html
index 3c5041f..7aca653 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/mllib/regression.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/mllib/regression.html
@@ -238,7 +238,7 @@
 
 @classmethod
 @since(1.4.0)
-[docs]
def load(cls, sc, path):
+[docs]
def load(cls, sc, path):
 Load a 
LinearRegressionModel.
 java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel.load(
 sc._jsc.sc(), path)
@@ -274,7 +274,7 @@
 
 @classmethod
 @since(0.9.0)
-[docs]
def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
+[docs]
def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
   initialWeights=None, regParam=0.0, regType=None, intercept=False,
   validateData=True, convergenceTol=0.001):
 
@@ -405,7 +405,7 @@
 
 @classmethod
 @since(1.4.0)
-[docs]
def load(cls, sc, path):
+[docs]
def load(cls, sc, path):
 Load a 
LassoModel.
 java_model = sc._jvm.org.apache.spark.mllib.regression.LassoModel.load(
 sc._jsc.sc(), path)
@@ -423,7 +423,7 @@
 
 @classmethod
 @since(0.9.0)
-[docs]
def train(cls, data, iterations=100, step=1.0, regParam=0.01,
+[docs]
def train(cls, data, iterations=100, step=1.0, regParam=0.01,
   miniBatchFraction=1.0, initialWeights=None, intercept=False,
   validateData=True, convergenceTol=0.001):
 
@@ -548,7 +548,7 @@
 
 @classmethod
 @since(1.4.0)
-[docs]
def load(cls, sc, path):
+[docs]
def load(cls, sc, path):
 Load a 
RidgeRegressionMode.
 java_model = sc._jvm.org.apache.spark.mllib.regression.RidgeRegressionModel.load(
 sc._jsc.sc(), path)
@@ -567,7 +567,7 @@
 
 @classmethod
 @since(0.9.0)
-[docs]
def train(cls, data, iterations=100, step=1.0, regParam=0.01,
+[docs]
def train(cls, data, iterations=100, step=1.0, regParam=0.01,
   miniBatchFraction=1.0, initialWeights=None, intercept=False,
   validateData=True, convergenceTol=0.001):
 
@@ -705,7 +705,7 @@
 
 @classmethod
 @since(1.4.0)
-[docs]
def load(cls, sc, path):
+[docs]
def load(cls, sc, path):
 Load an 
IsotonicRegressionModel.
 java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel.load(
 sc._jsc.sc(), path)
@@ -740,7 +740,7 @@
 
 @classmethod
 @since(1.4.0)
-[docs]
def train(cls, data, isotonic=True):
+[docs]
def train(cls, data, isotonic=True):
 
 Train an isotonic regression model on the given 
data.
 
@@ -840,7 +840,7 @@
 self.miniBatchFraction = miniBatchFraction
 self.convergenceTol = convergenceTol
 self._model = None
-super(StreamingLinearRegressionWithSGD, self).__init__(
+super(StreamingLinearRegressionWithSGD, self).__init__(
 model=self._model)
 
 @since(1.5.0)
@@ -874,7 +874,7 @@
 import doctest
 from pyspark.sql import SparkSession
 import pyspark.mllib.regression
-globs = pyspark.mllib.regression.__dict__.copy()
+globs = pyspark.mllib.regression.__dict__.copy()
 spark = SparkSession.builder\
 .master(local[2])\
 .appName(mllib.regression 
tests)\
@@ -885,7 +885,7 @@
 if failure_count:
 exit(-1)
 
-if __name__ == __main__:
+if __name__ == __main__:
 _test()
 
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/mllib/tree.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/mllib/tree.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/mllib/tree.html
index e060769..ed86923 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/mllib/tree.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/mllib/tree.html
@@ -170,7 +170,7 @@
 return self._java_model.toDebugString()
 
 @classmethod
-def _java_loader_class(cls):
+def _java_loader_class(cls):
 return org.apache.spark.mllib.tree.model.DecisionTreeModel
 
 
@@ -183,7 +183,7 @@
 
 
 @classmethod
-def _train(cls, data, type, numClasses, 
features, impurity=gini, maxDepth=5, maxBins=32,
+def _train(cls, data, type, numClasses, 
features, impurity=gini, maxDepth=5, maxBins=32,
minInstancesPerNode=1, minInfoGain=0.0):
 first = data.first()
 assert isinstance(first, LabeledPoint), the data should be RDD of LabeledPoint
@@ -193,7 +193,7 @@
 
 @classmethod
 

[10/14] spark-website git commit: Fix signature description broken in PySpark API documentation in 2.1.2

2018-07-03 Thread gurwls223
http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/mllib/clustering.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/mllib/clustering.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/mllib/clustering.html
index 8aab284..eef793a 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/mllib/clustering.html
+++ b/site/docs/2.1.2/api/python/_modules/pyspark/mllib/clustering.html
@@ -112,7 +112,7 @@
 
 
 def __init__(self, java_model):
-super(BisectingKMeansModel, self).__init__(java_model)
+super(BisectingKMeansModel, self).__init__(java_model)
 self.centers = [c.toArray() for c in self.call(clusterCenters)]
 
 @property
@@ -338,7 +338,7 @@
 
 @classmethod
 @since(1.4.0)
-[docs]
def load(cls, sc, path):
+[docs]
def load(cls, sc, path):
 
 Load a model from the given path.
 
@@ -353,8 +353,8 @@
 
 @classmethod
 @since(0.9.0)
-[docs]
def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode=k-means||,
-  seed=None, initializationSteps=2, epsilon=1e-4, initialModel=None):
+[docs]
def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode=k-means||,
+  seed=None, initializationSteps=2, epsilon=1e-4, initialModel=None):
 
 Train a k-means clustering model.
 
@@ -540,7 +540,7 @@
 
 @classmethod
 @since(1.5.0)
-[docs]
def load(cls, sc, path):
+[docs]
def load(cls, sc, path):
 Load the GaussianMixtureModel from 
disk.
 
 :param sc:
@@ -548,9 +548,9 @@
 :param path:
   Path to where the model is stored.
 
-model = cls._load_java(sc, path)
+model = cls._load_java(sc, path)
 wrapper = sc._jvm.org.apache.spark.mllib.api.python.GaussianMixtureModelWrapper(model)
-return cls(wrapper)
+return cls(wrapper)
 
 
 [docs]class GaussianMixture(object):
@@ -561,7 +561,7 @@
 
 @classmethod
 @since(1.3.0)
-[docs]
def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initialModel=None):
+[docs]
def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initialModel=None):
 
 Train a Gaussian Mixture clustering model.
 
@@ -671,11 +671,11 @@
 
 @classmethod
 @since(1.5.0)
-[docs]
def load(cls, sc, path):
+[docs]
def load(cls, sc, path):
 
 Load a model from the given path.
 
-model = cls._load_java(sc, path)
+model = cls._load_java(sc, path)
 wrapper =\
 sc._jvm.org.apache.spark.mllib.api.python.PowerIterationClusteringModelWrapper(model)
 return PowerIterationClusteringModel(wrapper)
@@ -694,7 +694,7 @@
 
 @classmethod
 @since(1.5.0)
-[docs]
def train(cls, rdd, k, maxIterations=100, initMode=random):
+[docs]
def train(cls, rdd, k, maxIterations=100, initMode=random):
 
 :param rdd:
   An RDD of (i, j, s\ :sub:`ij`\) tuples representing 
the
@@ -787,7 +787,7 @@
 .. versionadded:: 1.5.0
 
 def __init__(self, clusterCenters, clusterWeights):
-super(StreamingKMeansModel, self).__init__(centers=clusterCenters)
+super(StreamingKMeansModel, self).__init__(centers=clusterCenters)
 self._clusterWeights = list(clusterWeights)
 
 @property
@@ -1022,7 +1022,7 @@
 
 @classmethod
 @since(1.5.0)
-[docs]
def load(cls, sc, path):
+[docs]
def load(cls, sc, path):
 Load the LDAModel from disk.
 
 :param sc:
@@ -1045,7 +1045,7 @@
 
 @classmethod
 @since(1.5.0)
-[docs]
def train(cls, rdd, k=10, maxIterations=20, docConcentration=-1.0,
+[docs]
def train(cls, rdd, k=10, maxIterations=20, docConcentration=-1.0,
   topicConcentration=-1.0, seed=None, checkpointInterval=10, optimizer=em):
 Train a LDA model.
 
@@ -1091,7 +1091,7 @@
 def _test():
 import doctest
 import pyspark.mllib.clustering
-globs = pyspark.mllib.clustering.__dict__.copy()
+globs = pyspark.mllib.clustering.__dict__.copy()
 globs[sc] = 
SparkContext(local[4], PythonTest, batchSize=2)
 (failure_count, test_count) 
= doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
 globs[sc].stop()
@@ -1099,7 +1099,7 @@
 exit(-1)
 
 
-if __name__ == __main__:
+if __name__ == __main__:
 _test()
 
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/6bbac496/site/docs/2.1.2/api/python/_modules/pyspark/mllib/common.html
--
diff --git a/site/docs/2.1.2/api/python/_modules/pyspark/mllib/common.html 
b/site/docs/2.1.2/api/python/_modules/pyspark/mllib/common.html
index 4e761b0..2f015d3 100644
--- a/site/docs/2.1.2/api/python/_modules/pyspark/mllib/common.html
+++ 

svn commit: r27902 - /dev/spark/v2.2.2-rc2-bin/ /release/spark/spark-2.2.2/

2018-07-03 Thread tgraves
Author: tgraves
Date: Tue Jul  3 18:08:54 2018
New Revision: 27902

Log:
Publish spark 2.2.2

Added:
release/spark/spark-2.2.2/
  - copied from r27901, dev/spark/v2.2.2-rc2-bin/
Removed:
dev/spark/v2.2.2-rc2-bin/


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-24420][BUILD] Upgrade ASM to 6.1 to support JDK9+

2018-07-03 Thread dbtsai
Repository: spark
Updated Branches:
  refs/heads/master a7c8f0c8c -> 5585c5765


[SPARK-24420][BUILD] Upgrade ASM to 6.1 to support JDK9+

## What changes were proposed in this pull request?

Upgrade ASM to 6.1 to support JDK9+

## How was this patch tested?

Existing tests.

Author: DB Tsai 

Closes #21459 from dbtsai/asm.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5585c576
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5585c576
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5585c576

Branch: refs/heads/master
Commit: 5585c5765f13519a447587ca778d52ce6a36a484
Parents: a7c8f0c
Author: DB Tsai 
Authored: Tue Jul 3 10:13:48 2018 -0700
Committer: DB Tsai 
Committed: Tue Jul 3 10:13:48 2018 -0700

--
 core/pom.xml | 2 +-
 .../main/scala/org/apache/spark/util/ClosureCleaner.scala| 4 ++--
 dev/deps/spark-deps-hadoop-2.6   | 2 +-
 dev/deps/spark-deps-hadoop-2.7   | 2 +-
 dev/deps/spark-deps-hadoop-3.1   | 2 +-
 graphx/pom.xml   | 2 +-
 .../scala/org/apache/spark/graphx/util/BytecodeUtils.scala   | 4 ++--
 pom.xml  | 8 
 repl/pom.xml | 4 ++--
 .../scala/org/apache/spark/repl/ExecutorClassLoader.scala| 4 ++--
 sql/core/pom.xml | 2 +-
 11 files changed, 18 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5585c576/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 220522d..d0b869e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -56,7 +56,7 @@
 
 
   org.apache.xbean
-  xbean-asm5-shaded
+  xbean-asm6-shaded
 
 
   org.apache.hadoop

http://git-wip-us.apache.org/repos/asf/spark/blob/5585c576/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala 
b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index ad0c063..073d71c 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -22,8 +22,8 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
 import scala.collection.mutable.{Map, Set, Stack}
 import scala.language.existentials
 
-import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor, Type}
-import org.apache.xbean.asm5.Opcodes._
+import org.apache.xbean.asm6.{ClassReader, ClassVisitor, MethodVisitor, Type}
+import org.apache.xbean.asm6.Opcodes._
 
 import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.internal.Logging

http://git-wip-us.apache.org/repos/asf/spark/blob/5585c576/dev/deps/spark-deps-hadoop-2.6
--
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 96e9c27..f50a0aa 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -192,7 +192,7 @@ stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-2.6.3.jar
 validation-api-1.1.0.Final.jar
-xbean-asm5-shaded-4.4.jar
+xbean-asm6-shaded-4.8.jar
 xercesImpl-2.9.1.jar
 xmlenc-0.52.jar
 xz-1.0.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/5585c576/dev/deps/spark-deps-hadoop-2.7
--
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 4a6ee02..774f9dc 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -193,7 +193,7 @@ stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
 univocity-parsers-2.6.3.jar
 validation-api-1.1.0.Final.jar
-xbean-asm5-shaded-4.4.jar
+xbean-asm6-shaded-4.8.jar
 xercesImpl-2.9.1.jar
 xmlenc-0.52.jar
 xz-1.0.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/5585c576/dev/deps/spark-deps-hadoop-3.1
--
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index e0b560c..19c05ad 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -214,7 +214,7 @@ token-provider-1.0.1.jar
 univocity-parsers-2.6.3.jar
 validation-api-1.1.0.Final.jar
 woodstox-core-5.0.3.jar
-xbean-asm5-shaded-4.4.jar
+xbean-asm6-shaded-4.8.jar
 xz-1.0.jar
 zjsonpatch-0.3.0.jar
 zookeeper-3.4.9.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/5585c576/graphx/pom.xml

svn commit: r27892 - in /dev/spark/2.4.0-SNAPSHOT-2018_07_03_00_02-a7c8f0c-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s

2018-07-03 Thread pwendell
Author: pwendell
Date: Tue Jul  3 07:17:56 2018
New Revision: 27892

Log:
Apache Spark 2.4.0-SNAPSHOT-2018_07_03_00_02-a7c8f0c docs


[This commit notification would consist of 1467 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org