Github user xuanyuanking commented on a diff in the pull request: https://github.com/apache/spark/pull/21985#discussion_r207712639 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala --- @@ -446,3 +448,88 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio }) } } + +/** + * Extract all specific(idx) groups identified by a Java regex. + * + * NOTE: this expression is not THREAD-SAFE, as it has some internal mutable status. + */ +@ExpressionDescription( + usage = "_FUNC_(str, regexp[, idx]) - Extracts all groups that matches `regexp`.", + examples = """ + Examples: + > SELECT _FUNC_('100-200,300-400', '(\\d+)-(\\d+)', 1); + [100, 300] + """) +case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expression) --- End diff -- Add an abstract class to reduce duplicated code between `RegExpExtractAll` and `RegExpExtract`?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org