spark git commit: [SPARK-20456][DOCS] Add examples for functions collection for pyspark

felixcheung Fri, 07 Jul 2017 23:59:59 -0700

Repository: spark
Updated Branches:
  refs/heads/master a7b46c627 -> f5f02d213



[SPARK-20456][DOCS] Add examples for functions collection for pyspark

## What changes were proposed in this pull request?

This adds documentation to many functions in pyspark.sql.functions.py:
`upper`, `lower`, `reverse`, `unix_timestamp`, `from_unixtime`, `rand`, 
`randn`, `collect_list`, `collect_set`, `lit`
Add units to the trigonometry functions.
Renames columns in datetime examples to be more informative.
Adds links between some functions.

## How was this patch tested?

`./dev/lint-python`
`python python/pyspark/sql/functions.py`
`./python/run-tests.py --module pyspark-sql`

Author: Michael Patterson <map...@gmail.com>

Closes #17865 from map222/spark-20456.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f5f02d21
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f5f02d21
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f5f02d21

Branch: refs/heads/master
Commit: f5f02d213d3151f58070e113d64fcded4f5d401e
Parents: a7b46c6
Author: Michael Patterson <map...@gmail.com>
Authored: Fri Jul 7 23:59:34 2017 -0700
Committer: Felix Cheung <felixche...@apache.org>
Committed: Fri Jul 7 23:59:34 2017 -0700

----------------------------------------------------------------------
 R/pkg/R/functions.R                             |  11 +-
 python/pyspark/sql/functions.py                 | 166 ++++++++++++-------
 .../scala/org/apache/spark/sql/functions.scala  |  14 +-
 3 files changed, 119 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f5f02d21/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index c529d83..f28d26a 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -336,7 +336,8 @@ setMethod("asin",
           })
 
 #' @details
-#' \code{atan}: Computes the tangent inverse of the given value.
+#' \code{atan}: Computes the tangent inverse of the given value; the returned 
angle is in the range
+#' -pi/2 through pi/2.
 #'
 #' @rdname column_math_functions
 #' @export
@@ -599,7 +600,7 @@ setMethod("covar_pop", signature(col1 = 
"characterOrColumn", col2 = "characterOr
           })
 
 #' @details
-#' \code{cos}: Computes the cosine of the given value.
+#' \code{cos}: Computes the cosine of the given value. Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases cos cos,Column-method
@@ -1407,7 +1408,7 @@ setMethod("sign", signature(x = "Column"),
           })
 
 #' @details
-#' \code{sin}: Computes the sine of the given value.
+#' \code{sin}: Computes the sine of the given value. Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases sin sin,Column-method
@@ -1597,7 +1598,7 @@ setMethod("sumDistinct",
           })
 
 #' @details
-#' \code{tan}: Computes the tangent of the given value.
+#' \code{tan}: Computes the tangent of the given value. Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases tan tan,Column-method
@@ -1896,7 +1897,7 @@ setMethod("year",
 
 #' @details
 #' \code{atan2}: Returns the angle theta from the conversion of rectangular 
coordinates
-#' (x, y) to polar coordinates (r, theta).
+#' (x, y) to polar coordinates (r, theta). Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases atan2 atan2,Column-method

http://git-wip-us.apache.org/repos/asf/spark/blob/f5f02d21/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3416c4b..5d8ded8 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -67,9 +67,14 @@ def _create_window_function(name, doc=''):
     _.__doc__ = 'Window function: ' + doc
     return _
 
+_lit_doc = """
+    Creates a :class:`Column` of literal value.
 
+    >>> df.select(lit(5).alias('height')).withColumn('spark_user', 
lit(True)).take(1)
+    [Row(height=5, spark_user=True)]
+    """
 _functions = {
-    'lit': 'Creates a :class:`Column` of literal value.',
+    'lit': _lit_doc,
     'col': 'Returns a :class:`Column` based on the given column name.',
     'column': 'Returns a :class:`Column` based on the given column name.',
     'asc': 'Returns a sort expression based on the ascending order of the 
given column name.',
@@ -95,10 +100,13 @@ _functions_1_4 = {
             '0.0 through pi.',
     'asin': 'Computes the sine inverse of the given value; the returned angle 
is in the range' +
             '-pi/2 through pi/2.',
-    'atan': 'Computes the tangent inverse of the given value.',
+    'atan': 'Computes the tangent inverse of the given value; the returned 
angle is in the range' +
+            '-pi/2 through pi/2',
     'cbrt': 'Computes the cube-root of the given value.',
     'ceil': 'Computes the ceiling of the given value.',
-    'cos': 'Computes the cosine of the given value.',
+    'cos': """Computes the cosine of the given value.
+
+           :param col: :class:`DoubleType` column, units in radians.""",
     'cosh': 'Computes the hyperbolic cosine of the given value.',
     'exp': 'Computes the exponential of the given value.',
     'expm1': 'Computes the exponential of the given value minus one.',
@@ -109,15 +117,33 @@ _functions_1_4 = {
     'rint': 'Returns the double value that is closest in value to the argument 
and' +
             ' is equal to a mathematical integer.',
     'signum': 'Computes the signum of the given value.',
-    'sin': 'Computes the sine of the given value.',
+    'sin': """Computes the sine of the given value.
+
+           :param col: :class:`DoubleType` column, units in radians.""",
     'sinh': 'Computes the hyperbolic sine of the given value.',
-    'tan': 'Computes the tangent of the given value.',
+    'tan': """Computes the tangent of the given value.
+
+           :param col: :class:`DoubleType` column, units in radians.""",
     'tanh': 'Computes the hyperbolic tangent of the given value.',
-    'toDegrees': '.. note:: Deprecated in 2.1, use degrees instead.',
-    'toRadians': '.. note:: Deprecated in 2.1, use radians instead.',
+    'toDegrees': '.. note:: Deprecated in 2.1, use :func:`degrees` instead.',
+    'toRadians': '.. note:: Deprecated in 2.1, use :func:`radians` instead.',
     'bitwiseNOT': 'Computes bitwise not.',
 }
 
+_collect_list_doc = """
+    Aggregate function: returns a list of objects with duplicates.
+
+    >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
+    >>> df2.agg(collect_list('age')).collect()
+    [Row(collect_list(age)=[2, 5, 5])]
+    """
+_collect_set_doc = """
+    Aggregate function: returns a set of objects with duplicate elements 
eliminated.
+
+    >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
+    >>> df2.agg(collect_set('age')).collect()
+    [Row(collect_set(age)=[5, 2])]
+    """
 _functions_1_6 = {
     # unary math functions
     'stddev': 'Aggregate function: returns the unbiased sample standard 
deviation of' +
@@ -131,9 +157,8 @@ _functions_1_6 = {
     'var_pop':  'Aggregate function: returns the population variance of the 
values in a group.',
     'skewness': 'Aggregate function: returns the skewness of the values in a 
group.',
     'kurtosis': 'Aggregate function: returns the kurtosis of the values in a 
group.',
-    'collect_list': 'Aggregate function: returns a list of objects with 
duplicates.',
-    'collect_set': 'Aggregate function: returns a set of objects with 
duplicate elements' +
-                   ' eliminated.',
+    'collect_list': _collect_list_doc,
+    'collect_set': _collect_set_doc
 }
 
 _functions_2_1 = {
@@ -147,7 +172,7 @@ _functions_2_1 = {
 # math functions that take two arguments as input
 _binary_mathfunctions = {
     'atan2': 'Returns the angle theta from the conversion of rectangular 
coordinates (x, y) to' +
-             'polar coordinates (r, theta).',
+             'polar coordinates (r, theta). Units in radians.',
     'hypot': 'Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or 
underflow.',
     'pow': 'Returns the value of the first argument raised to the power of the 
second argument.',
 }
@@ -200,17 +225,20 @@ del _name, _doc
 @since(1.3)
 def approxCountDistinct(col, rsd=None):
     """
-    .. note:: Deprecated in 2.1, use approx_count_distinct instead.
+    .. note:: Deprecated in 2.1, use :func:`approx_count_distinct` instead.
     """
     return approx_count_distinct(col, rsd)
 
 
 @since(2.1)
 def approx_count_distinct(col, rsd=None):
-    """Returns a new :class:`Column` for approximate distinct count of ``col``.
+    """Aggregate function: returns a new :class:`Column` for approximate 
distinct count of column `col`.
 
-    >>> df.agg(approx_count_distinct(df.age).alias('c')).collect()
-    [Row(c=2)]
+    :param rsd: maximum estimation error allowed (default = 0.05). For rsd < 
0.01, it is more
+        efficient to use :func:`countDistinct`
+
+    >>> df.agg(approx_count_distinct(df.age).alias('distinct_ages')).collect()
+    [Row(distinct_ages=2)]
     """
     sc = SparkContext._active_spark_context
     if rsd is None:
@@ -267,8 +295,7 @@ def coalesce(*cols):
 
 @since(1.6)
 def corr(col1, col2):
-    """Returns a new :class:`Column` for the Pearson Correlation Coefficient 
for ``col1``
-    and ``col2``.
+    """Returns a new :class:`Column` for the Pearson Correlation Coefficient 
for ``col1`` and ``col2``.
 
     >>> a = range(20)
     >>> b = [2 * x for x in range(20)]
@@ -282,8 +309,7 @@ def corr(col1, col2):
 
 @since(2.0)
 def covar_pop(col1, col2):
-    """Returns a new :class:`Column` for the population covariance of ``col1``
-    and ``col2``.
+    """Returns a new :class:`Column` for the population covariance of ``col1`` 
and ``col2``.
 
     >>> a = [1] * 10
     >>> b = [1] * 10
@@ -297,8 +323,7 @@ def covar_pop(col1, col2):
 
 @since(2.0)
 def covar_samp(col1, col2):
-    """Returns a new :class:`Column` for the sample covariance of ``col1``
-    and ``col2``.
+    """Returns a new :class:`Column` for the sample covariance of ``col1`` and 
``col2``.
 
     >>> a = [1] * 10
     >>> b = [1] * 10
@@ -450,7 +475,7 @@ def monotonically_increasing_id():
 def nanvl(col1, col2):
     """Returns col1 if it is not NaN, or col2 if col1 is NaN.
 
-    Both inputs should be floating point columns (DoubleType or FloatType).
+    Both inputs should be floating point columns (:class:`DoubleType` or 
:class:`FloatType`).
 
     >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], 
("a", "b"))
     >>> df.select(nanvl("a", "b").alias("r1"), nanvl(df.a, 
df.b).alias("r2")).collect()
@@ -460,10 +485,15 @@ def nanvl(col1, col2):
     return Column(sc._jvm.functions.nanvl(_to_java_column(col1), 
_to_java_column(col2)))
 
 
+@ignore_unicode_prefix
 @since(1.4)
 def rand(seed=None):
     """Generates a random column with independent and identically distributed 
(i.i.d.) samples
     from U[0.0, 1.0].
+
+    >>> df.withColumn('rand', rand(seed=42) * 3).collect()
+    [Row(age=2, name=u'Alice', rand=1.1568609015300986),
+     Row(age=5, name=u'Bob', rand=1.403379671529166)]
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
@@ -473,10 +503,15 @@ def rand(seed=None):
     return Column(jc)
 
 
+@ignore_unicode_prefix
 @since(1.4)
 def randn(seed=None):
     """Generates a column with independent and identically distributed 
(i.i.d.) samples from
     the standard normal distribution.
+
+    >>> df.withColumn('randn', randn(seed=42)).collect()
+    [Row(age=2, name=u'Alice', randn=-0.7556247885860078),
+    Row(age=5, name=u'Bob', randn=-0.0861619008451133)]
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
@@ -760,7 +795,7 @@ def ntile(n):
 @since(1.5)
 def current_date():
     """
-    Returns the current date as a date column.
+    Returns the current date as a :class:`DateType` column.
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.current_date())
@@ -768,7 +803,7 @@ def current_date():
 
 def current_timestamp():
     """
-    Returns the current timestamp as a timestamp column.
+    Returns the current timestamp as a :class:`TimestampType` column.
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.current_timestamp())
@@ -787,8 +822,8 @@ def date_format(date, format):
     .. note:: Use when ever possible specialized functions like `year`. These 
benefit from a
         specialized implementation.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(date_format('a', 'MM/dd/yyy').alias('date')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(date_format('dt', 'MM/dd/yyy').alias('date')).collect()
     [Row(date=u'04/08/2015')]
     """
     sc = SparkContext._active_spark_context
@@ -800,8 +835,8 @@ def year(col):
     """
     Extract the year of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(year('a').alias('year')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(year('dt').alias('year')).collect()
     [Row(year=2015)]
     """
     sc = SparkContext._active_spark_context
@@ -813,8 +848,8 @@ def quarter(col):
     """
     Extract the quarter of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(quarter('a').alias('quarter')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(quarter('dt').alias('quarter')).collect()
     [Row(quarter=2)]
     """
     sc = SparkContext._active_spark_context
@@ -826,8 +861,8 @@ def month(col):
     """
     Extract the month of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(month('a').alias('month')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(month('dt').alias('month')).collect()
     [Row(month=4)]
    """
     sc = SparkContext._active_spark_context
@@ -839,8 +874,8 @@ def dayofmonth(col):
     """
     Extract the day of the month of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(dayofmonth('a').alias('day')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(dayofmonth('dt').alias('day')).collect()
     [Row(day=8)]
     """
     sc = SparkContext._active_spark_context
@@ -852,8 +887,8 @@ def dayofyear(col):
     """
     Extract the day of the year of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(dayofyear('a').alias('day')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(dayofyear('dt').alias('day')).collect()
     [Row(day=98)]
     """
     sc = SparkContext._active_spark_context
@@ -865,8 +900,8 @@ def hour(col):
     """
     Extract the hours of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
-    >>> df.select(hour('a').alias('hour')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> df.select(hour('ts').alias('hour')).collect()
     [Row(hour=13)]
     """
     sc = SparkContext._active_spark_context
@@ -878,8 +913,8 @@ def minute(col):
     """
     Extract the minutes of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
-    >>> df.select(minute('a').alias('minute')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> df.select(minute('ts').alias('minute')).collect()
     [Row(minute=8)]
     """
     sc = SparkContext._active_spark_context
@@ -891,8 +926,8 @@ def second(col):
     """
     Extract the seconds of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
-    >>> df.select(second('a').alias('second')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> df.select(second('ts').alias('second')).collect()
     [Row(second=15)]
     """
     sc = SparkContext._active_spark_context
@@ -904,8 +939,8 @@ def weekofyear(col):
     """
     Extract the week number of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(weekofyear(df.a).alias('week')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(weekofyear(df.dt).alias('week')).collect()
     [Row(week=15)]
     """
     sc = SparkContext._active_spark_context
@@ -917,9 +952,9 @@ def date_add(start, days):
     """
     Returns the date that is `days` days after `start`
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['d'])
-    >>> df.select(date_add(df.d, 1).alias('d')).collect()
-    [Row(d=datetime.date(2015, 4, 9))]
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(date_add(df.dt, 1).alias('next_date')).collect()
+    [Row(next_date=datetime.date(2015, 4, 9))]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.date_add(_to_java_column(start), days))
@@ -930,9 +965,9 @@ def date_sub(start, days):
     """
     Returns the date that is `days` days before `start`
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['d'])
-    >>> df.select(date_sub(df.d, 1).alias('d')).collect()
-    [Row(d=datetime.date(2015, 4, 7))]
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(date_sub(df.dt, 1).alias('prev_date')).collect()
+    [Row(prev_date=datetime.date(2015, 4, 7))]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.date_sub(_to_java_column(start), days))
@@ -956,9 +991,9 @@ def add_months(start, months):
     """
     Returns the date that is `months` months after `start`
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['d'])
-    >>> df.select(add_months(df.d, 1).alias('d')).collect()
-    [Row(d=datetime.date(2015, 5, 8))]
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(add_months(df.dt, 1).alias('next_month')).collect()
+    [Row(next_month=datetime.date(2015, 5, 8))]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.add_months(_to_java_column(start), months))
@@ -969,8 +1004,8 @@ def months_between(date1, date2):
     """
     Returns the number of months between date1 and date2.
 
-    >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], 
['t', 'd'])
-    >>> df.select(months_between(df.t, df.d).alias('months')).collect()
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], 
['date1', 'date2'])
+    >>> df.select(months_between(df.date1, df.date2).alias('months')).collect()
     [Row(months=3.9495967...)]
     """
     sc = SparkContext._active_spark_context
@@ -1073,12 +1108,17 @@ def last_day(date):
     return Column(sc._jvm.functions.last_day(_to_java_column(date)))
 
 
+@ignore_unicode_prefix
 @since(1.5)
 def from_unixtime(timestamp, format="yyyy-MM-dd HH:mm:ss"):
     """
     Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) 
to a string
     representing the timestamp of that moment in the current system time zone 
in the given
     format.
+
+    >>> time_df = spark.createDataFrame([(1428476400,)], ['unix_time'])
+    >>> time_df.select(from_unixtime('unix_time').alias('ts')).collect()
+    [Row(ts=u'2015-04-08 00:00:00')]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.from_unixtime(_to_java_column(timestamp), 
format))
@@ -1092,6 +1132,10 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd 
HH:mm:ss'):
     locale, return null if fail.
 
     if `timestamp` is None, then it returns current timestamp.
+
+    >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> time_df.select(unix_timestamp('dt', 
'yyyy-MM-dd').alias('unix_time')).collect()
+    [Row(unix_time=1428476400)]
     """
     sc = SparkContext._active_spark_context
     if timestamp is None:
@@ -1106,8 +1150,8 @@ def from_utc_timestamp(timestamp, tz):
     that corresponds to the same time of day in the given timezone.
 
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
-    >>> df.select(from_utc_timestamp(df.t, "PST").alias('t')).collect()
-    [Row(t=datetime.datetime(1997, 2, 28, 2, 30))]
+    >>> df.select(from_utc_timestamp(df.t, 
"PST").alias('local_time')).collect()
+    [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
     """
     sc = SparkContext._active_spark_context
     return 
Column(sc._jvm.functions.from_utc_timestamp(_to_java_column(timestamp), tz))
@@ -1119,9 +1163,9 @@ def to_utc_timestamp(timestamp, tz):
     Given a timestamp, which corresponds to a certain time of day in the given 
timezone, returns
     another timestamp that corresponds to the same time of day in UTC.
 
-    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
-    >>> df.select(to_utc_timestamp(df.t, "PST").alias('t')).collect()
-    [Row(t=datetime.datetime(1997, 2, 28, 18, 30))]
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['ts'])
+    >>> df.select(to_utc_timestamp(df.ts, "PST").alias('utc_time')).collect()
+    [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
     """
     sc = SparkContext._active_spark_context
     return 
Column(sc._jvm.functions.to_utc_timestamp(_to_java_column(timestamp), tz))
@@ -2095,7 +2139,7 @@ def _test():
     sc = spark.sparkContext
     globs['sc'] = sc
     globs['spark'] = spark
-    globs['df'] = sc.parallelize([Row(name='Alice', age=2), Row(name='Bob', 
age=5)]).toDF()
+    globs['df'] = spark.createDataFrame([Row(name='Alice', age=2), 
Row(name='Bob', age=5)])
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.functions, globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)

http://git-wip-us.apache.org/repos/asf/spark/blob/f5f02d21/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 1263071..a5e4a44 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1321,7 +1321,8 @@ object functions {
   def asin(columnName: String): Column = asin(Column(columnName))
 
   /**
-   * Computes the tangent inverse of the given value.
+   * Computes the tangent inverse of the given column; the returned angle is 
in the range
+   * -pi/2 through pi/2
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1329,7 +1330,8 @@ object functions {
   def atan(e: Column): Column = withExpr { Atan(e.expr) }
 
   /**
-   * Computes the tangent inverse of the given column.
+   * Computes the tangent inverse of the given column; the returned angle is 
in the range
+   * -pi/2 through pi/2
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1338,7 +1340,7 @@ object functions {
 
   /**
    * Returns the angle theta from the conversion of rectangular coordinates 
(x, y) to
-   * polar coordinates (r, theta).
+   * polar coordinates (r, theta). Units in radians.
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1470,7 +1472,7 @@ object functions {
   }
 
   /**
-   * Computes the cosine of the given value.
+   * Computes the cosine of the given value. Units in radians.
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1937,7 +1939,7 @@ object functions {
   def signum(columnName: String): Column = signum(Column(columnName))
 
   /**
-   * Computes the sine of the given value.
+   * Computes the sine of the given value. Units in radians.
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1969,7 +1971,7 @@ object functions {
   def sinh(columnName: String): Column = sinh(Column(columnName))
 
   /**
-   * Computes the tangent of the given value.
+   * Computes the tangent of the given value. Units in radians.
    *
    * @group math_funcs
    * @since 1.4.0


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20456][DOCS] Add examples for functions collection for pyspark

Reply via email to