This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new a35043c [SPARK-27591][SQL] Fix UnivocityParser for UserDefinedType a35043c is described below commit a35043c9e22a9bd9e372246c8d337e016736536c Author: Artem Kalchenko <artem.kalche...@gmail.com> AuthorDate: Wed May 1 08:27:51 2019 +0900 [SPARK-27591][SQL] Fix UnivocityParser for UserDefinedType ## What changes were proposed in this pull request? Fix bug in UnivocityParser. makeConverter method didn't work correctly for UsedDefinedType ## How was this patch tested? A test suite for UnivocityParser has been extended. Closes #24496 from kalkolab/spark-27591. Authored-by: Artem Kalchenko <artem.kalche...@gmail.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- .../spark/sql/catalyst/csv/UnivocityParser.scala | 2 +- .../sql/catalyst/csv/UnivocityParserSuite.scala | 35 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala index b26044e..8456b7d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala @@ -166,7 +166,7 @@ class UnivocityParser( case _: StringType => (d: String) => nullSafeDatum(d, name, nullable, options)(UTF8String.fromString) - case udt: UserDefinedType[_] => (datum: String) => + case udt: UserDefinedType[_] => makeConverter(name, udt.sqlType, nullable) // We don't actually hit this exception though, we keep it for understandability diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala index 986de12..933c576 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala @@ -231,4 +231,39 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper { Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalParsing) } + + test("SPARK-27591 UserDefinedType can be read") { + + @SQLUserDefinedType(udt = classOf[StringBasedUDT]) + case class NameId(name: String, id: Int) + + class StringBasedUDT extends UserDefinedType[NameId] { + override def sqlType: DataType = StringType + + override def serialize(obj: NameId): Any = s"${obj.name}\t${obj.id}" + + override def deserialize(datum: Any): NameId = datum match { + case s: String => + val split = s.split("\t") + if (split.length != 2) throw new RuntimeException(s"Can't parse $s into NameId"); + NameId(split(0), Integer.parseInt(split(1))) + case _ => throw new RuntimeException(s"Can't parse $datum into NameId"); + } + + override def userClass: Class[NameId] = classOf[NameId] + } + + object StringBasedUDT extends StringBasedUDT + + val input = "name\t42" + val expected = UTF8String.fromString(input) + + val options = new CSVOptions(Map.empty[String, String], false, "GMT") + val parser = new UnivocityParser(StructType(Seq.empty), options) + + val convertedValue = parser.makeConverter("_1", StringBasedUDT, nullable = false).apply(input) + + assert(convertedValue.isInstanceOf[UTF8String]) + assert(convertedValue == expected) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org