Github user felixcheung commented on a diff in the pull request: https://github.com/apache/spark/pull/22227#discussion_r214244981 --- Diff: python/pyspark/sql/functions.py --- @@ -1669,20 +1669,36 @@ def repeat(col, n): return Column(sc._jvm.functions.repeat(_to_java_column(col), n)) -@since(1.5) +@since(2.4) @ignore_unicode_prefix -def split(str, pattern): - """ - Splits str around pattern (pattern is a regular expression). - - .. note:: pattern is a string represent the regular expression. - - >>> df = spark.createDataFrame([('ab12cd',)], ['s',]) - >>> df.select(split(df.s, '[0-9]+').alias('s')).collect() - [Row(s=[u'ab', u'cd'])] - """ - sc = SparkContext._active_spark_context - return Column(sc._jvm.functions.split(_to_java_column(str), pattern)) +def split(str, regex, limit=-1): --- End diff -- this would be a breaking API change I believe for python
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org