Repository: spark Updated Branches: refs/heads/branch-1.0 16c8d562d -> e61149dd0
[SPARK-2504][SQL] Fix nullability of Substring expression. This is a follow-up of #1359 with nullability narrowing. Author: Takuya UESHIN <ues...@happy-camper.st> Closes #1426 from ueshin/issues/SPARK-2504 and squashes the following commits: 5157832 [Takuya UESHIN] Remove unnecessary white spaces. 80958ac [Takuya UESHIN] Fix nullability of Substring expression. (cherry picked from commit 632fb3d9a9ebb3d2218385403145d5b89c41c025) Signed-off-by: Reynold Xin <r...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e61149dd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e61149dd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e61149dd Branch: refs/heads/branch-1.0 Commit: e61149dd0cc525b797f63463ff3c3eff7f6d0c3a Parents: 16c8d56 Author: Takuya UESHIN <ues...@happy-camper.st> Authored: Tue Jul 15 22:43:48 2014 -0700 Committer: Reynold Xin <r...@apache.org> Committed: Tue Jul 15 22:43:59 2014 -0700 ---------------------------------------------------------------------- .../catalyst/expressions/stringOperations.scala | 24 ++++++++++---------- .../expressions/ExpressionEvaluationSuite.scala | 14 ++++++++---- 2 files changed, 22 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e61149dd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 4bd7bf5..f1b27c3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -215,19 +215,19 @@ case class EndsWith(left: Expression, right: Expression) case class Substring(str: Expression, pos: Expression, len: Expression) extends Expression { type EvaluatedType = Any - - def nullable: Boolean = true + + def nullable: Boolean = str.nullable || pos.nullable || len.nullable def dataType: DataType = { if (!resolved) { throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved") } if (str.dataType == BinaryType) str.dataType else StringType } - + def references = children.flatMap(_.references).toSet - + override def children = str :: pos :: len :: Nil - + @inline def slice[T, C <: Any](str: C, startPos: Int, sliceLen: Int) (implicit ev: (C=>IndexedSeqOptimized[T,_])): Any = { @@ -237,40 +237,40 @@ case class Substring(str: Expression, pos: Expression, len: Expression) extends // refers to element i-1 in the sequence. If a start index i is less than 0, it refers // to the -ith element before the end of the sequence. If a start index i is 0, it // refers to the first element. - + val start = startPos match { case pos if pos > 0 => pos - 1 case neg if neg < 0 => len + neg case _ => 0 } - + val end = sliceLen match { case max if max == Integer.MAX_VALUE => max case x => start + x } - + str.slice(start, end) } - + override def eval(input: Row): Any = { val string = str.eval(input) val po = pos.eval(input) val ln = len.eval(input) - + if ((string == null) || (po == null) || (ln == null)) { null } else { val start = po.asInstanceOf[Int] val length = ln.asInstanceOf[Int] - + string match { case ba: Array[Byte] => slice(ba, start, length) case other => slice(other.toString, start, length) } } } - + override def toString = len match { case max if max == Integer.MAX_VALUE => s"SUBSTR($str, $pos)" case _ => s"SUBSTR($str, $pos, $len)" http://git-wip-us.apache.org/repos/asf/spark/blob/e61149dd/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index f1d7aed..143330b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -469,9 +469,9 @@ class ExpressionEvaluationSuite extends FunSuite { test("Substring") { val row = new GenericRow(Array[Any]("example", "example".toArray.map(_.toByte))) - + val s = 'a.string.at(0) - + // substring from zero position with less-than-full length checkEvaluation(Substring(s, Literal(0, IntegerType), Literal(2, IntegerType)), "ex", row) checkEvaluation(Substring(s, Literal(1, IntegerType), Literal(2, IntegerType)), "ex", row) @@ -501,7 +501,7 @@ class ExpressionEvaluationSuite extends FunSuite { // substring(null, _, _) -> null checkEvaluation(Substring(s, Literal(100, IntegerType), Literal(4, IntegerType)), null, new GenericRow(Array[Any](null))) - + // substring(_, null, _) -> null checkEvaluation(Substring(s, Literal(null, IntegerType), Literal(4, IntegerType)), null, row) @@ -514,6 +514,12 @@ class ExpressionEvaluationSuite extends FunSuite { // 2-arg substring from nonzero position checkEvaluation(Substring(s, Literal(2, IntegerType), Literal(Integer.MAX_VALUE, IntegerType)), "xample", row) + + val s_notNull = 'a.string.notNull.at(0) + + assert(Substring(s, Literal(0, IntegerType), Literal(2, IntegerType)).nullable === true) + assert(Substring(s_notNull, Literal(0, IntegerType), Literal(2, IntegerType)).nullable === false) + assert(Substring(s_notNull, Literal(null, IntegerType), Literal(2, IntegerType)).nullable === true) + assert(Substring(s_notNull, Literal(0, IntegerType), Literal(null, IntegerType)).nullable === true) } } -