This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 886dbe0 [SPARK-36501][ML] Fix random col names in LSHModel.approxSimilarityJoin 886dbe0 is described below commit 886dbe01cdd9082f3a82bb31598e22fd4c9a7e5a Author: Tim Armstrong <tim.armstr...@databricks.com> AuthorDate: Fri Aug 13 12:04:42 2021 +0900 [SPARK-36501][ML] Fix random col names in LSHModel.approxSimilarityJoin ### What changes were proposed in this pull request? Random.nextString() can include characters that are not valid in identifiers or likely to be buggy, e.g. non-printing characters, ".", "`". Instead use a utility that will always generate valid alphanumeric identifiers ### Why are the changes needed? To deflake BucketedRandomProjectionLSHSuite and avoid similar failures that could be encountered by users. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Ran org.apache.spark.ml.feature.BucketedRandomProjectionLSHSuite Closes #33730 from timarmstrong/flaky-lsb. Authored-by: Tim Armstrong <tim.armstr...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala index c330404..7963fc8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala @@ -17,8 +17,6 @@ package org.apache.spark.ml.feature -import scala.util.Random - import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.{IntParam, ParamValidators} @@ -280,7 +278,7 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]] val explodedB = if (datasetA != datasetB) { processDataset(datasetB, rightColName, explodeCols) } else { - val recreatedB = recreateCol(datasetB, $(inputCol), s"${$(inputCol)}#${Random.nextString(5)}") + val recreatedB = recreateCol(datasetB, $(inputCol), Identifiable.randomUID(inputCol.name)) processDataset(recreatedB, rightColName, explodeCols) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org