Repository: spark
Updated Branches:
refs/heads/branch-2.0 091cd5f26 -> 03f336d89
[MINOR][PYSPARK][DOC] Fix wrongly formatted examples in PySpark documentation
## What changes were proposed in this pull request?
This PR fixes wrongly formatted examples in PySpark documentation as below:
- **`SparkSession`**
- **Before**

- **After**

- **`Builder`**
- **Before**

- **After**

This PR also fixes several similar instances across the documentation in `sql`
PySpark module.
## How was this patch tested?
N/A
Author: hyukjinkwon <[email protected]>
Closes #14063 from HyukjinKwon/minor-pyspark-builder.
(cherry picked from commit 4e14199ff740ea186eb2cec2e5cf901b58c5f90e)
Signed-off-by: Reynold Xin <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/03f336d8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/03f336d8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/03f336d8
Branch: refs/heads/branch-2.0
Commit: 03f336d8921e1f22ee4d1f6fa8869163b1f29ea9
Parents: 091cd5f
Author: hyukjinkwon <[email protected]>
Authored: Wed Jul 6 10:45:51 2016 -0700
Committer: Reynold Xin <[email protected]>
Committed: Wed Jul 6 10:45:56 2016 -0700
----------------------------------------------------------------------
python/pyspark/mllib/clustering.py | 14 +++++++-------
python/pyspark/sql/dataframe.py | 8 ++++----
python/pyspark/sql/functions.py | 8 ++++----
python/pyspark/sql/group.py | 2 ++
python/pyspark/sql/session.py | 13 +++++++------
python/pyspark/sql/types.py | 4 ++--
6 files changed, 26 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/03f336d8/python/pyspark/mllib/clustering.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/clustering.py
b/python/pyspark/mllib/clustering.py
index 93a0b64..c38c543 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -571,14 +571,14 @@ class PowerIterationClusteringModel(JavaModelWrapper,
JavaSaveable, JavaLoader):
>>> import math
>>> def genCircle(r, n):
- ... points = []
- ... for i in range(0, n):
- ... theta = 2.0 * math.pi * i / n
- ... points.append((r * math.cos(theta), r * math.sin(theta)))
- ... return points
+ ... points = []
+ ... for i in range(0, n):
+ ... theta = 2.0 * math.pi * i / n
+ ... points.append((r * math.cos(theta), r * math.sin(theta)))
+ ... return points
>>> def sim(x, y):
- ... dist2 = (x[0] - y[0]) * (x[0] - y[0]) + (x[1] - y[1]) * (x[1] - y[1])
- ... return math.exp(-dist2 / 2.0)
+ ... dist2 = (x[0] - y[0]) * (x[0] - y[0]) + (x[1] - y[1]) * (x[1] -
y[1])
+ ... return math.exp(-dist2 / 2.0)
>>> r1 = 1.0
>>> n1 = 10
>>> r2 = 4.0
http://git-wip-us.apache.org/repos/asf/spark/blob/03f336d8/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index e6e7029..c7d704a 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1033,10 +1033,10 @@ class DataFrame(object):
:func:`drop_duplicates` is an alias for :func:`dropDuplicates`.
>>> from pyspark.sql import Row
- >>> df = sc.parallelize([ \
- Row(name='Alice', age=5, height=80), \
- Row(name='Alice', age=5, height=80), \
- Row(name='Alice', age=10, height=80)]).toDF()
+ >>> df = sc.parallelize([ \\
+ ... Row(name='Alice', age=5, height=80), \\
+ ... Row(name='Alice', age=5, height=80), \\
+ ... Row(name='Alice', age=10, height=80)]).toDF()
>>> df.dropDuplicates().show()
+---+------+-----+
|age|height| name|
http://git-wip-us.apache.org/repos/asf/spark/blob/03f336d8/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 15cefc8..1feca6e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1550,8 +1550,8 @@ def translate(srcCol, matching, replace):
The translate will happen when any character in the string matching with
the character
in the `matching`.
- >>> spark.createDataFrame([('translate',)], ['a']).select(translate('a',
"rnlt", "123")\
- .alias('r')).collect()
+ >>> spark.createDataFrame([('translate',)], ['a']).select(translate('a',
"rnlt", "123") \\
+ ... .alias('r')).collect()
[Row(r=u'1a2s3ae')]
"""
sc = SparkContext._active_spark_context
@@ -1649,8 +1649,8 @@ def get_json_object(col, path):
>>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1":
"value12"}''')]
>>> df = spark.createDataFrame(data, ("key", "jstring"))
- >>> df.select(df.key, get_json_object(df.jstring, '$.f1').alias("c0"), \
- get_json_object(df.jstring, '$.f2').alias("c1")
).collect()
+ >>> df.select(df.key, get_json_object(df.jstring, '$.f1').alias("c0"), \\
+ ... get_json_object(df.jstring, '$.f2').alias("c1")
).collect()
[Row(key=u'1', c0=u'value1', c1=u'value2'), Row(key=u'2', c0=u'value12',
c1=None)]
"""
sc = SparkContext._active_spark_context
http://git-wip-us.apache.org/repos/asf/spark/blob/03f336d8/python/pyspark/sql/group.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index a423206..f2092f9 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -179,10 +179,12 @@ class GroupedData(object):
:param values: List of values that will be translated to columns in
the output DataFrame.
# Compute the sum of earnings for each year by course with each course
as a separate column
+
>>> df4.groupBy("year").pivot("course", ["dotNET",
"Java"]).sum("earnings").collect()
[Row(year=2012, dotNET=15000, Java=20000), Row(year=2013,
dotNET=48000, Java=30000)]
# Or without specifying column values (less efficient)
+
>>> df4.groupBy("year").pivot("course").sum("earnings").collect()
[Row(year=2012, Java=20000, dotNET=15000), Row(year=2013, Java=30000,
dotNET=48000)]
"""
http://git-wip-us.apache.org/repos/asf/spark/blob/03f336d8/python/pyspark/sql/session.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 55f86a1..a360fbe 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -66,12 +66,11 @@ class SparkSession(object):
tables, execute SQL over tables, cache tables, and read parquet files.
To create a SparkSession, use the following builder pattern:
- >>> spark = SparkSession.builder \
- .master("local") \
- .appName("Word Count") \
- .config("spark.some.config.option", "some-value") \
- .getOrCreate()
-
+ >>> spark = SparkSession.builder \\
+ ... .master("local") \\
+ ... .appName("Word Count") \\
+ ... .config("spark.some.config.option", "some-value") \\
+ ... .getOrCreate()
"""
class Builder(object):
@@ -87,11 +86,13 @@ class SparkSession(object):
both :class:`SparkConf` and :class:`SparkSession`'s own
configuration.
For an existing SparkConf, use `conf` parameter.
+
>>> from pyspark.conf import SparkConf
>>> SparkSession.builder.config(conf=SparkConf())
<pyspark.sql.session...
For a (key, value) pair, you can omit parameter names.
+
>>> SparkSession.builder.config("spark.some.config.option",
"some-value")
<pyspark.sql.session...
http://git-wip-us.apache.org/repos/asf/spark/blob/03f336d8/python/pyspark/sql/types.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index a367987..eea8068 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -486,8 +486,8 @@ class StructType(DataType):
DataType object.
>>> struct1 = StructType().add("f1", StringType(), True).add("f2",
StringType(), True, None)
- >>> struct2 = StructType([StructField("f1", StringType(), True),\
- StructField("f2", StringType(), True, None)])
+ >>> struct2 = StructType([StructField("f1", StringType(), True), \\
+ ... StructField("f2", StringType(), True, None)])
>>> struct1 == struct2
True
>>> struct1 = StructType().add(StructField("f1", StringType(), True))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]