Repository: spark Updated Branches: refs/heads/master 4c6c6711d -> fc64e83f9
[SPARK-24207][R] add R API for PrefixSpan ## What changes were proposed in this pull request? add R API for PrefixSpan ## How was this patch tested? add test in test_mllib_fpm.R Author: Huaxin Gao <huax...@us.ibm.com> Closes #21710 from huaxingao/spark-24207. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc64e83f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc64e83f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc64e83f Branch: refs/heads/master Commit: fc64e83f9538d6b7e13359a4933a454ba7ed89ec Parents: 4c6c671 Author: Huaxin Gao <huax...@us.ibm.com> Authored: Sun Oct 21 12:32:43 2018 -0700 Committer: Felix Cheung <felixche...@apache.org> Committed: Sun Oct 21 12:32:43 2018 -0700 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 3 +- R/pkg/R/generics.R | 4 ++ R/pkg/R/mllib_fpm.R | 64 +++++++++++++++++++ R/pkg/tests/fulltests/test_mllib_fpm.R | 16 +++++ R/pkg/vignettes/sparkr-vignettes.Rmd | 13 ++++ docs/ml-frequent-pattern-mining.md | 53 ++++++++++++++++ docs/sparkr.md | 1 + .../examples/ml/JavaPrefixSpanExample.java | 67 ++++++++++++++++++++ .../src/main/python/ml/prefixspan_example.py | 48 ++++++++++++++ examples/src/main/r/ml/prefixSpan.R | 42 ++++++++++++ .../spark/examples/ml/PrefixSpanExample.scala | 62 ++++++++++++++++++ .../apache/spark/ml/r/PrefixSpanWrapper.scala | 34 ++++++++++ 12 files changed, 406 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index c512284..36d7a9b 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -70,7 +70,8 @@ exportMethods("glm", "spark.svmLinear", "spark.fpGrowth", "spark.freqItemsets", - "spark.associationRules") + "spark.associationRules", + "spark.findFrequentSequentialPatterns") # Job group lifecycle management methods export("setJobGroup", http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index d501f73..045e075 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1457,6 +1457,10 @@ setGeneric("spark.freqItemsets", function(object) { standardGeneric("spark.freqI #' @rdname spark.fpGrowth setGeneric("spark.associationRules", function(object) { standardGeneric("spark.associationRules") }) +#' @rdname spark.prefixSpan +setGeneric("spark.findFrequentSequentialPatterns", + function(data, ...) { standardGeneric("spark.findFrequentSequentialPatterns") }) + #' @param object a fitted ML model object. #' @param path the directory where the model is saved. #' @param ... additional argument(s) passed to the method. http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/R/pkg/R/mllib_fpm.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R index 4ad34fe..ac37580 100644 --- a/R/pkg/R/mllib_fpm.R +++ b/R/pkg/R/mllib_fpm.R @@ -23,6 +23,12 @@ #' @note FPGrowthModel since 2.2.0 setClass("FPGrowthModel", slots = list(jobj = "jobj")) +#' S4 class that represents a PrefixSpan +#' +#' @param jobj a Java object reference to the backing Scala PrefixSpan +#' @note PrefixSpan since 3.0.0 +setClass("PrefixSpan", slots = list(jobj = "jobj")) + #' FP-growth #' #' A parallel FP-growth algorithm to mine frequent itemsets. @@ -155,3 +161,61 @@ setMethod("write.ml", signature(object = "FPGrowthModel", path = "character"), function(object, path, overwrite = FALSE) { write_internal(object, path, overwrite) }) + +#' PrefixSpan +#' +#' A parallel PrefixSpan algorithm to mine frequent sequential patterns. +#' \code{spark.findFrequentSequentialPatterns} returns a complete set of frequent sequential +#' patterns. +#' For more details, see +#' \href{https://spark.apache.org/docs/latest/mllib-frequent-pattern-mining.html#prefixspan}{ +#' PrefixSpan}. +#' +# Find frequent sequential patterns. +#' @param data A SparkDataFrame. +#' @param minSupport Minimal support level. +#' @param maxPatternLength Maximal pattern length. +#' @param maxLocalProjDBSize Maximum number of items (including delimiters used in the internal +#' storage format) allowed in a projected database before local +#' processing. +#' @param sequenceCol name of the sequence column in dataset. +#' @param ... additional argument(s) passed to the method. +#' @return A complete set of frequent sequential patterns in the input sequences of itemsets. +#' The returned \code{SparkDataFrame} contains columns of sequence and corresponding +#' frequency. The schema of it will be: +#' \code{sequence: ArrayType(ArrayType(T))} (T is the item type) +#' \code{freq: Long} +#' @rdname spark.prefixSpan +#' @aliases findFrequentSequentialPatterns,PrefixSpan,SparkDataFrame-method +#' @examples +#' \dontrun{ +#' df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))), +#' list(list(list(1L), list(3L, 2L), list(1L, 2L))), +#' list(list(list(1L, 2L), list(5L))), +#' list(list(list(6L)))), schema = c("sequence")) +#' frequency <- spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L, +#' maxLocalProjDBSize = 32000000L) +#' showDF(frequency) +#' } +#' @note spark.findFrequentSequentialPatterns(SparkDataFrame) since 3.0.0 +setMethod("spark.findFrequentSequentialPatterns", + signature(data = "SparkDataFrame"), + function(data, minSupport = 0.1, maxPatternLength = 10L, + maxLocalProjDBSize = 32000000L, sequenceCol = "sequence") { + if (!is.numeric(minSupport) || minSupport < 0) { + stop("minSupport should be a number with value >= 0.") + } + if (!is.integer(maxPatternLength) || maxPatternLength <= 0) { + stop("maxPatternLength should be a number with value > 0.") + } + if (!is.numeric(maxLocalProjDBSize) || maxLocalProjDBSize <= 0) { + stop("maxLocalProjDBSize should be a number with value > 0.") + } + + jobj <- callJStatic("org.apache.spark.ml.r.PrefixSpanWrapper", "getPrefixSpan", + as.numeric(minSupport), as.integer(maxPatternLength), + as.numeric(maxLocalProjDBSize), as.character(sequenceCol)) + object <- new("PrefixSpan", jobj = jobj) + dataFrame(callJMethod(object@jobj, "findFrequentSequentialPatterns", data@sdf)) + } + ) http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/R/pkg/tests/fulltests/test_mllib_fpm.R ---------------------------------------------------------------------- diff --git a/R/pkg/tests/fulltests/test_mllib_fpm.R b/R/pkg/tests/fulltests/test_mllib_fpm.R index d80f66a..daf9ff9 100644 --- a/R/pkg/tests/fulltests/test_mllib_fpm.R +++ b/R/pkg/tests/fulltests/test_mllib_fpm.R @@ -83,4 +83,20 @@ test_that("spark.fpGrowth", { }) +test_that("spark.prefixSpan", { + df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))), + list(list(list(1L), list(3L, 2L), list(1L, 2L))), + list(list(list(1L, 2L), list(5L))), + list(list(list(6L)))), schema = c("sequence")) + result1 <- spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L, + maxLocalProjDBSize = 32000000L) + + expected_result <- createDataFrame(list(list(list(list(1L)), 3L), + list(list(list(3L)), 2L), + list(list(list(2L)), 3L), + list(list(list(1L, 2L)), 3L), + list(list(list(1L), list(3L)), 2L)), + schema = c("sequence", "freq")) + }) + sparkR.session.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/R/pkg/vignettes/sparkr-vignettes.Rmd ---------------------------------------------------------------------- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index ad93494..7d924ef 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -542,6 +542,7 @@ SparkR supports the following machine learning models and algorithms. #### Frequent Pattern Mining * FP-growth +* PrefixSpan #### Statistics @@ -998,6 +999,18 @@ We can make predictions based on the `antecedent`. head(predict(fpm, df)) ``` +#### PrefixSpan + +`spark.findFrequentSequentialPatterns` method can be used to find the complete set of frequent sequential patterns in the input sequences of itemsets. + +```{r} +df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))), + list(list(list(1L), list(3L, 2L), list(1L, 2L))), + list(list(list(1L, 2L), list(5L))), + list(list(list(6L)))), schema = c("sequence")) +head(spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L)) +``` + #### Kolmogorov-Smirnov Test `spark.kstest` runs a two-sided, one-sample [Kolmogorov-Smirnov (KS) test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test). http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/docs/ml-frequent-pattern-mining.md ---------------------------------------------------------------------- diff --git a/docs/ml-frequent-pattern-mining.md b/docs/ml-frequent-pattern-mining.md index 81634de..c2043d4 100644 --- a/docs/ml-frequent-pattern-mining.md +++ b/docs/ml-frequent-pattern-mining.md @@ -85,3 +85,56 @@ Refer to the [R API docs](api/R/spark.fpGrowth.html) for more details. </div> </div> + +## PrefixSpan + +PrefixSpan is a sequential pattern mining algorithm described in +[Pei et al., Mining Sequential Patterns by Pattern-Growth: The +PrefixSpan Approach](http://dx.doi.org/10.1109%2FTKDE.2004.77). We refer +the reader to the referenced paper for formalizing the sequential +pattern mining problem. + +`spark.ml`'s PrefixSpan implementation takes the following parameters: + +* `minSupport`: the minimum support required to be considered a frequent + sequential pattern. +* `maxPatternLength`: the maximum length of a frequent sequential + pattern. Any frequent pattern exceeding this length will not be + included in the results. +* `maxLocalProjDBSize`: the maximum number of items allowed in a + prefix-projected database before local iterative processing of the + projected database begins. This parameter should be tuned with respect + to the size of your executors. +* `sequenceCol`: the name of the sequence column in dataset (default "sequence"), rows with + nulls in this column are ignored. + +**Examples** + +<div class="codetabs"> + +<div data-lang="scala" markdown="1"> +Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.fpm.PrefixSpan) for more details. + +{% include_example scala/org/apache/spark/examples/ml/PrefixSpanExample.scala %} +</div> + +<div data-lang="java" markdown="1"> +Refer to the [Java API docs](api/java/org/apache/spark/ml/fpm/PrefixSpan.html) for more details. + +{% include_example java/org/apache/spark/examples/ml/JavaPrefixSpanExample.java %} +</div> + +<div data-lang="python" markdown="1"> +Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.fpm.PrefixSpan) for more details. + +{% include_example python/ml/prefixspan_example.py %} +</div> + +<div data-lang="r" markdown="1"> + +Refer to the [R API docs](api/R/spark.prefixSpan.html) for more details. + +{% include_example r/ml/prefixSpan.R %} +</div> + +</div> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/docs/sparkr.md ---------------------------------------------------------------------- diff --git a/docs/sparkr.md b/docs/sparkr.md index e6ec9ee..ba4cca8 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -510,6 +510,7 @@ SparkR supports the following machine learning algorithms currently: #### Frequent Pattern Mining * [`spark.fpGrowth`](api/R/spark.fpGrowth.html) : [`FP-growth`](ml-frequent-pattern-mining.html#fp-growth) +* [`spark.prefixSpan`](api/R/spark.prefixSpan.html) : [`PrefixSpan`](ml-frequent-pattern-mining.html#prefixSpan) #### Statistics http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/examples/src/main/java/org/apache/spark/examples/ml/JavaPrefixSpanExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPrefixSpanExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPrefixSpanExample.java new file mode 100644 index 0000000..891f306 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPrefixSpanExample.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml; + +// $example on$ +import java.util.Arrays; +import java.util.List; + +import org.apache.spark.ml.fpm.PrefixSpan; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.types.*; +// $example off$ + +/** + * An example demonstrating PrefixSpan. + * Run with + * <pre> + * bin/run-example ml.JavaPrefixSpanExample + * </pre> + */ +public class JavaPrefixSpanExample { + public static void main(String[] args) { + SparkSession spark = SparkSession + .builder() + .appName("JavaPrefixSpanExample") + .getOrCreate(); + + // $example on$ + List<Row> data = Arrays.asList( + RowFactory.create(Arrays.asList(Arrays.asList(1, 2), Arrays.asList(3))), + RowFactory.create(Arrays.asList(Arrays.asList(1), Arrays.asList(3, 2), Arrays.asList(1,2))), + RowFactory.create(Arrays.asList(Arrays.asList(1, 2), Arrays.asList(5))), + RowFactory.create(Arrays.asList(Arrays.asList(6))) + ); + StructType schema = new StructType(new StructField[]{ new StructField( + "sequence", new ArrayType(new ArrayType(DataTypes.IntegerType, true), true), + false, Metadata.empty()) + }); + Dataset<Row> sequenceDF = spark.createDataFrame(data, schema); + + PrefixSpan prefixSpan = new PrefixSpan().setMinSupport(0.5).setMaxPatternLength(5); + + // Finding frequent sequential patterns + prefixSpan.findFrequentSequentialPatterns(sequenceDF).show(); + // $example off$ + + spark.stop(); + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/examples/src/main/python/ml/prefixspan_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/prefixspan_example.py b/examples/src/main/python/ml/prefixspan_example.py new file mode 100644 index 0000000..88d1d41 --- /dev/null +++ b/examples/src/main/python/ml/prefixspan_example.py @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +An example demonstrating PrefixSpan. +Run with: + bin/spark-submit examples/src/main/python/ml/prefixspan_example.py +""" +# $example on$ +from pyspark.ml.fpm import PrefixSpan +# $example off$ +from pyspark.sql import Row, SparkSession + +if __name__ == "__main__": + spark = SparkSession\ + .builder\ + .appName("PrefixSpanExample")\ + .getOrCreate() + sc = spark.sparkContext + + # $example on$ + df = sc.parallelize([Row(sequence=[[1, 2], [3]]), + Row(sequence=[[1], [3, 2], [1, 2]]), + Row(sequence=[[1, 2], [5]]), + Row(sequence=[[6]])]).toDF() + + prefixSpan = PrefixSpan(minSupport=0.5, maxPatternLength=5, + maxLocalProjDBSize=32000000) + + # Find frequent sequential patterns. + prefixSpan.findFrequentSequentialPatterns(df).show() + # $example off$ + + spark.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/examples/src/main/r/ml/prefixSpan.R ---------------------------------------------------------------------- diff --git a/examples/src/main/r/ml/prefixSpan.R b/examples/src/main/r/ml/prefixSpan.R new file mode 100644 index 0000000..9b70573 --- /dev/null +++ b/examples/src/main/r/ml/prefixSpan.R @@ -0,0 +1,42 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# To run this example use +# ./bin/spark-submit examples/src/main/r/ml/prefixSpan.R + +# Load SparkR library into your R session +library(SparkR) + +# Initialize SparkSession +sparkR.session(appName = "SparkR-ML-prefixSpan-example") + +# $example on$ +# Load training data + +df <- createDataFrame(list(list(list(list(1L, 2L), list(3L))), + list(list(list(1L), list(3L, 2L), list(1L, 2L))), + list(list(list(1L, 2L), list(5L))), + list(list(list(6L)))), schema = c("sequence")) + +# Finding frequent sequential patterns +frequency <- spark.findFrequentSequentialPatterns(df, minSupport = 0.5, maxPatternLength = 5L, + maxLocalProjDBSize = 32000000L) +showDF(frequency) + +# $example off$ + +sparkR.session.stop() \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala new file mode 100644 index 0000000..0a2d310 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml + +// scalastyle:off println + +// $example on$ +import org.apache.spark.ml.fpm.PrefixSpan +// $example off$ +import org.apache.spark.sql.SparkSession + +/** + * An example demonstrating PrefixSpan. + * Run with + * {{{ + * bin/run-example ml.PrefixSpanExample + * }}} + */ +object PrefixSpanExample { + + def main(args: Array[String]): Unit = { + val spark = SparkSession + .builder + .appName(s"${this.getClass.getSimpleName}") + .getOrCreate() + import spark.implicits._ + + // $example on$ + val smallTestData = Seq( + Seq(Seq(1, 2), Seq(3)), + Seq(Seq(1), Seq(3, 2), Seq(1, 2)), + Seq(Seq(1, 2), Seq(5)), + Seq(Seq(6))) + + val df = smallTestData.toDF("sequence") + val result = new PrefixSpan() + .setMinSupport(0.5) + .setMaxPatternLength(5) + .setMaxLocalProjDBSize(32000000) + .findFrequentSequentialPatterns(df) + .show() + // $example off$ + + spark.stop() + } +} +// scalastyle:on println http://git-wip-us.apache.org/repos/asf/spark/blob/fc64e83f/mllib/src/main/scala/org/apache/spark/ml/r/PrefixSpanWrapper.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/PrefixSpanWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/PrefixSpanWrapper.scala new file mode 100644 index 0000000..268d596 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/ml/r/PrefixSpanWrapper.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.r + +import org.apache.spark.ml.fpm.PrefixSpan + +private[r] object PrefixSpanWrapper { + def getPrefixSpan( + minSupport: Double, + maxPatternLength: Int, + maxLocalProjDBSize: Double, + sequenceCol: String): PrefixSpan = { + new PrefixSpan() + .setMinSupport(minSupport) + .setMaxPatternLength(maxPatternLength) + .setMaxLocalProjDBSize(maxLocalProjDBSize.toLong) + .setSequenceCol(sequenceCol) + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org