Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/22227#discussion_r216185520 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/functions.scala --- @@ -2546,15 +2546,39 @@ object functions { def soundex(e: Column): Column = withExpr { SoundEx(e.expr) } /** - * Splits str around pattern (pattern is a regular expression). + * Splits str around matches of the given regex. * - * @note Pattern is a string representation of the regular expression. + * @param str a string expression to split + * @param regex a string representing a regular expression. The regex string should be + * a Java regular expression. * * @group string_funcs * @since 1.5.0 */ - def split(str: Column, pattern: String): Column = withExpr { - StringSplit(str.expr, lit(pattern).expr) + def split(str: Column, regex: String): Column = withExpr { + StringSplit(str.expr, Literal(regex), Literal(-1)) + } + + /** + * Splits str around matches of the given regex. + * + * @param str a string expression to split + * @param regex a string representing a regular expression. The regex string should be + * a Java regular expression. + * @param limit an integer expression which controls the number of times the regex is applied. + * <ul> + * <li>limit greater than 0: The resulting array's length will not be more than `limit`, + * and the resulting array's last entry will contain all input + * beyond the last matched regex.</li> + * <li>limit less than or equal to 0: `regex` will be applied as many times as possible, + * and the resulting array can be of any size.</li> + * </ul> --- End diff -- Let's use the same way to make it multiple lines with https://github.com/apache/spark/blob/e754887182304ad0d622754e33192ebcdd515965/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala#L338-L386
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org