Repository: spark
Updated Branches:
  refs/heads/master 3d2134fc0 -> 6e1e2eba6


[SPARK-8240][SQL] string function: concat

Author: Reynold Xin <r...@databricks.com>

Closes #7486 from rxin/concat and squashes the following commits:

5217d6e [Reynold Xin] Removed Hive's concat test.
f5cb7a3 [Reynold Xin] Concat is never nullable.
ae4e61f [Reynold Xin] Removed extra import.
fddcbbd [Reynold Xin] Fixed NPE.
22e831c [Reynold Xin] Added missing file.
57a2352 [Reynold Xin] [SPARK-8240][SQL] string function: concat


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e1e2eba
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e1e2eba
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e1e2eba

Branch: refs/heads/master
Commit: 6e1e2eba696e89ba57bf5450b9c72c4386e43dc8
Parents: 3d2134f
Author: Reynold Xin <r...@databricks.com>
Authored: Sat Jul 18 14:07:56 2015 -0700
Committer: Reynold Xin <r...@databricks.com>
Committed: Sat Jul 18 14:07:56 2015 -0700

----------------------------------------------------------------------
 .../catalyst/analysis/FunctionRegistry.scala    |   1 +
 .../catalyst/expressions/stringOperations.scala |  37 ++
 .../expressions/StringExpressionsSuite.scala    | 484 +++++++++++++++++++
 .../expressions/StringFunctionsSuite.scala      | 462 ------------------
 .../scala/org/apache/spark/sql/functions.scala  |  22 +
 .../spark/sql/DataFrameFunctionsSuite.scala     | 242 ----------
 .../apache/spark/sql/StringFunctionsSuite.scala | 284 +++++++++++
 .../hive/execution/HiveCompatibilitySuite.scala |   4 +-
 .../apache/spark/unsafe/types/UTF8String.java   |  40 +-
 .../spark/unsafe/types/UTF8StringSuite.java     |  14 +
 10 files changed, 882 insertions(+), 708 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index ce552a1..d1cda6b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -152,6 +152,7 @@ object FunctionRegistry {
     // string functions
     expression[Ascii]("ascii"),
     expression[Base64]("base64"),
+    expression[Concat]("concat"),
     expression[Encode]("encode"),
     expression[Decode]("decode"),
     expression[FormatNumber]("format_number"),

http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index c64afe7..b36354e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -27,6 +27,43 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// This file defines expressions for string operations.
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+/**
+ * An expression that concatenates multiple input strings into a single string.
+ * Input expressions that are evaluated to nulls are skipped.
+ *
+ * For example, `concat("a", null, "b")` is evaluated to `"ab"`.
+ *
+ * Note that this is different from Hive since Hive outputs null if any input 
is null.
+ * We never output null.
+ */
+case class Concat(children: Seq[Expression]) extends Expression with 
ImplicitCastInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = 
Seq.fill(children.size)(StringType)
+  override def dataType: DataType = StringType
+
+  override def nullable: Boolean = false
+  override def foldable: Boolean = children.forall(_.foldable)
+
+  override def eval(input: InternalRow): Any = {
+    val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
+    UTF8String.concat(inputs : _*)
+  }
+
+  override protected def genCode(ctx: CodeGenContext, ev: 
GeneratedExpressionCode): String = {
+    val evals = children.map(_.gen(ctx))
+    val inputs = evals.map { eval => s"${eval.isNull} ? null : 
${eval.primitive}" }.mkString(", ")
+    evals.map(_.code).mkString("\n") + s"""
+      boolean ${ev.isNull} = false;
+      UTF8String ${ev.primitive} = UTF8String.concat($inputs);
+    """
+  }
+}
+
 
 trait StringRegexExpression extends ImplicitCastInputTypes {
   self: BinaryExpression =>

http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
new file mode 100644
index 0000000..0ed567a
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -0,0 +1,484 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.types._
+
+
+class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+  test("concat") {
+    def testConcat(inputs: String*): Unit = {
+      val expected = inputs.filter(_ != null).mkString
+      checkEvaluation(Concat(inputs.map(Literal.create(_, StringType))), 
expected, EmptyRow)
+    }
+
+    testConcat()
+    testConcat(null)
+    testConcat("")
+    testConcat("ab")
+    testConcat("a", "b")
+    testConcat("a", "b", "C")
+    testConcat("a", null, "C")
+    testConcat("a", null, null)
+    testConcat(null, null, null)
+
+    // scalastyle:off
+    // non ascii characters are not allowed in the code, so we disable the 
scalastyle here.
+    testConcat("数据", null, "砖头")
+    // scalastyle:on
+  }
+
+  test("StringComparison") {
+    val row = create_row("abc", null)
+    val c1 = 'a.string.at(0)
+    val c2 = 'a.string.at(1)
+
+    checkEvaluation(c1 contains "b", true, row)
+    checkEvaluation(c1 contains "x", false, row)
+    checkEvaluation(c2 contains "b", null, row)
+    checkEvaluation(c1 contains Literal.create(null, StringType), null, row)
+
+    checkEvaluation(c1 startsWith "a", true, row)
+    checkEvaluation(c1 startsWith "b", false, row)
+    checkEvaluation(c2 startsWith "a", null, row)
+    checkEvaluation(c1 startsWith Literal.create(null, StringType), null, row)
+
+    checkEvaluation(c1 endsWith "c", true, row)
+    checkEvaluation(c1 endsWith "b", false, row)
+    checkEvaluation(c2 endsWith "b", null, row)
+    checkEvaluation(c1 endsWith Literal.create(null, StringType), null, row)
+  }
+
+  test("Substring") {
+    val row = create_row("example", "example".toArray.map(_.toByte))
+
+    val s = 'a.string.at(0)
+
+    // substring from zero position with less-than-full length
+    checkEvaluation(
+      Substring(s, Literal.create(0, IntegerType), Literal.create(2, 
IntegerType)), "ex", row)
+    checkEvaluation(
+      Substring(s, Literal.create(1, IntegerType), Literal.create(2, 
IntegerType)), "ex", row)
+
+    // substring from zero position with full length
+    checkEvaluation(
+      Substring(s, Literal.create(0, IntegerType), Literal.create(7, 
IntegerType)), "example", row)
+    checkEvaluation(
+      Substring(s, Literal.create(1, IntegerType), Literal.create(7, 
IntegerType)), "example", row)
+
+    // substring from zero position with greater-than-full length
+    checkEvaluation(Substring(s, Literal.create(0, IntegerType), 
Literal.create(100, IntegerType)),
+      "example", row)
+    checkEvaluation(Substring(s, Literal.create(1, IntegerType), 
Literal.create(100, IntegerType)),
+      "example", row)
+
+    // substring from nonzero position with less-than-full length
+    checkEvaluation(Substring(s, Literal.create(2, IntegerType), 
Literal.create(2, IntegerType)),
+      "xa", row)
+
+    // substring from nonzero position with full length
+    checkEvaluation(Substring(s, Literal.create(2, IntegerType), 
Literal.create(6, IntegerType)),
+      "xample", row)
+
+    // substring from nonzero position with greater-than-full length
+    checkEvaluation(Substring(s, Literal.create(2, IntegerType), 
Literal.create(100, IntegerType)),
+      "xample", row)
+
+    // zero-length substring (within string bounds)
+    checkEvaluation(Substring(s, Literal.create(0, IntegerType), 
Literal.create(0, IntegerType)),
+      "", row)
+
+    // zero-length substring (beyond string bounds)
+    checkEvaluation(Substring(s, Literal.create(100, IntegerType), 
Literal.create(4, IntegerType)),
+      "", row)
+
+    // substring(null, _, _) -> null
+    checkEvaluation(Substring(s, Literal.create(100, IntegerType), 
Literal.create(4, IntegerType)),
+      null, create_row(null))
+
+    // substring(_, null, _) -> null
+    checkEvaluation(Substring(s, Literal.create(null, IntegerType), 
Literal.create(4, IntegerType)),
+      null, row)
+
+    // substring(_, _, null) -> null
+    checkEvaluation(
+      Substring(s, Literal.create(100, IntegerType), Literal.create(null, 
IntegerType)),
+      null,
+      row)
+
+    // 2-arg substring from zero position
+    checkEvaluation(
+      Substring(s, Literal.create(0, IntegerType), 
Literal.create(Integer.MAX_VALUE, IntegerType)),
+      "example",
+      row)
+    checkEvaluation(
+      Substring(s, Literal.create(1, IntegerType), 
Literal.create(Integer.MAX_VALUE, IntegerType)),
+      "example",
+      row)
+
+    // 2-arg substring from nonzero position
+    checkEvaluation(
+      Substring(s, Literal.create(2, IntegerType), 
Literal.create(Integer.MAX_VALUE, IntegerType)),
+      "xample",
+      row)
+
+    val s_notNull = 'a.string.notNull.at(0)
+
+    assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, 
IntegerType)).nullable
+      === true)
+    assert(
+      Substring(s_notNull, Literal.create(0, IntegerType), Literal.create(2, 
IntegerType)).nullable
+        === false)
+    assert(Substring(s_notNull,
+      Literal.create(null, IntegerType), Literal.create(2, 
IntegerType)).nullable === true)
+    assert(Substring(s_notNull,
+      Literal.create(0, IntegerType), Literal.create(null, 
IntegerType)).nullable === true)
+
+    checkEvaluation(s.substr(0, 2), "ex", row)
+    checkEvaluation(s.substr(0), "example", row)
+    checkEvaluation(s.substring(0, 2), "ex", row)
+    checkEvaluation(s.substring(0), "example", row)
+  }
+
+  test("LIKE literal Regular Expression") {
+    checkEvaluation(Literal.create(null, StringType).like("a"), null)
+    checkEvaluation(Literal.create("a", StringType).like(Literal.create(null, 
StringType)), null)
+    checkEvaluation(Literal.create(null, StringType).like(Literal.create(null, 
StringType)), null)
+    checkEvaluation("abdef" like "abdef", true)
+    checkEvaluation("a_%b" like "a\\__b", true)
+    checkEvaluation("addb" like "a_%b", true)
+    checkEvaluation("addb" like "a\\__b", false)
+    checkEvaluation("addb" like "a%\\%b", false)
+    checkEvaluation("a_%b" like "a%\\%b", true)
+    checkEvaluation("addb" like "a%", true)
+    checkEvaluation("addb" like "**", false)
+    checkEvaluation("abc" like "a%", true)
+    checkEvaluation("abc"  like "b%", false)
+    checkEvaluation("abc"  like "bc%", false)
+    checkEvaluation("a\nb" like "a_b", true)
+    checkEvaluation("ab" like "a%b", true)
+    checkEvaluation("a\nb" like "a%b", true)
+  }
+
+  test("LIKE Non-literal Regular Expression") {
+    val regEx = 'a.string.at(0)
+    checkEvaluation("abcd" like regEx, null, create_row(null))
+    checkEvaluation("abdef" like regEx, true, create_row("abdef"))
+    checkEvaluation("a_%b" like regEx, true, create_row("a\\__b"))
+    checkEvaluation("addb" like regEx, true, create_row("a_%b"))
+    checkEvaluation("addb" like regEx, false, create_row("a\\__b"))
+    checkEvaluation("addb" like regEx, false, create_row("a%\\%b"))
+    checkEvaluation("a_%b" like regEx, true, create_row("a%\\%b"))
+    checkEvaluation("addb" like regEx, true, create_row("a%"))
+    checkEvaluation("addb" like regEx, false, create_row("**"))
+    checkEvaluation("abc" like regEx, true, create_row("a%"))
+    checkEvaluation("abc" like regEx, false, create_row("b%"))
+    checkEvaluation("abc" like regEx, false, create_row("bc%"))
+    checkEvaluation("a\nb" like regEx, true, create_row("a_b"))
+    checkEvaluation("ab" like regEx, true, create_row("a%b"))
+    checkEvaluation("a\nb" like regEx, true, create_row("a%b"))
+
+    checkEvaluation(Literal.create(null, StringType) like regEx, null, 
create_row("bc%"))
+  }
+
+  test("RLIKE literal Regular Expression") {
+    checkEvaluation(Literal.create(null, StringType) rlike "abdef", null)
+    checkEvaluation("abdef" rlike Literal.create(null, StringType), null)
+    checkEvaluation(Literal.create(null, StringType) rlike 
Literal.create(null, StringType), null)
+    checkEvaluation("abdef" rlike "abdef", true)
+    checkEvaluation("abbbbc" rlike "a.*c", true)
+
+    checkEvaluation("fofo" rlike "^fo", true)
+    checkEvaluation("fo\no" rlike "^fo\no$", true)
+    checkEvaluation("Bn" rlike "^Ba*n", true)
+    checkEvaluation("afofo" rlike "fo", true)
+    checkEvaluation("afofo" rlike "^fo", false)
+    checkEvaluation("Baan" rlike "^Ba?n", false)
+    checkEvaluation("axe" rlike "pi|apa", false)
+    checkEvaluation("pip" rlike "^(pi)*$", false)
+
+    checkEvaluation("abc"  rlike "^ab", true)
+    checkEvaluation("abc"  rlike "^bc", false)
+    checkEvaluation("abc"  rlike "^ab", true)
+    checkEvaluation("abc"  rlike "^bc", false)
+
+    intercept[java.util.regex.PatternSyntaxException] {
+      evaluate("abbbbc" rlike "**")
+    }
+  }
+
+  test("RLIKE Non-literal Regular Expression") {
+    val regEx = 'a.string.at(0)
+    checkEvaluation("abdef" rlike regEx, true, create_row("abdef"))
+    checkEvaluation("abbbbc" rlike regEx, true, create_row("a.*c"))
+    checkEvaluation("fofo" rlike regEx, true, create_row("^fo"))
+    checkEvaluation("fo\no" rlike regEx, true, create_row("^fo\no$"))
+    checkEvaluation("Bn" rlike regEx, true, create_row("^Ba*n"))
+
+    intercept[java.util.regex.PatternSyntaxException] {
+      evaluate("abbbbc" rlike regEx, create_row("**"))
+    }
+  }
+
+  test("ascii for string") {
+    val a = 'a.string.at(0)
+    checkEvaluation(Ascii(Literal("efg")), 101, create_row("abdef"))
+    checkEvaluation(Ascii(a), 97, create_row("abdef"))
+    checkEvaluation(Ascii(a), 0, create_row(""))
+    checkEvaluation(Ascii(a), null, create_row(null))
+    checkEvaluation(Ascii(Literal.create(null, StringType)), null, 
create_row("abdef"))
+  }
+
+  test("base64/unbase64 for string") {
+    val a = 'a.string.at(0)
+    val b = 'b.binary.at(0)
+    val bytes = Array[Byte](1, 2, 3, 4)
+
+    checkEvaluation(Base64(Literal(bytes)), "AQIDBA==", create_row("abdef"))
+    checkEvaluation(Base64(UnBase64(Literal("AQIDBA=="))), "AQIDBA==", 
create_row("abdef"))
+    checkEvaluation(Base64(UnBase64(Literal(""))), "", create_row("abdef"))
+    checkEvaluation(Base64(UnBase64(Literal.create(null, StringType))), null, 
create_row("abdef"))
+    checkEvaluation(Base64(UnBase64(a)), "AQIDBA==", create_row("AQIDBA=="))
+
+    checkEvaluation(Base64(b), "AQIDBA==", create_row(bytes))
+    checkEvaluation(Base64(b), "", create_row(Array[Byte]()))
+    checkEvaluation(Base64(b), null, create_row(null))
+    checkEvaluation(Base64(Literal.create(null, StringType)), null, 
create_row("abdef"))
+
+    checkEvaluation(UnBase64(a), null, create_row(null))
+    checkEvaluation(UnBase64(Literal.create(null, StringType)), null, 
create_row("abdef"))
+  }
+
+  test("encode/decode for string") {
+    val a = 'a.string.at(0)
+    val b = 'b.binary.at(0)
+    // scalastyle:off
+    // non ascii characters are not allowed in the code, so we disable the 
scalastyle here.
+    checkEvaluation(
+      Decode(Encode(Literal("大千世界"), Literal("UTF-16LE")), 
Literal("UTF-16LE")), "大千世界")
+    checkEvaluation(
+      Decode(Encode(a, Literal("utf-8")), Literal("utf-8")), "大千世界", 
create_row("大千世界"))
+    checkEvaluation(
+      Decode(Encode(a, Literal("utf-8")), Literal("utf-8")), "", 
create_row(""))
+    // scalastyle:on
+    checkEvaluation(Encode(a, Literal("utf-8")), null, create_row(null))
+    checkEvaluation(Encode(Literal.create(null, StringType), 
Literal("utf-8")), null)
+    checkEvaluation(Encode(a, Literal.create(null, StringType)), null, 
create_row(""))
+
+    checkEvaluation(Decode(b, Literal("utf-8")), null, create_row(null))
+    checkEvaluation(Decode(Literal.create(null, BinaryType), 
Literal("utf-8")), null)
+    checkEvaluation(Decode(b, Literal.create(null, StringType)), null, 
create_row(null))
+  }
+
+  test("Levenshtein distance") {
+    checkEvaluation(Levenshtein(Literal.create(null, StringType), 
Literal("")), null)
+    checkEvaluation(Levenshtein(Literal(""), Literal.create(null, 
StringType)), null)
+    checkEvaluation(Levenshtein(Literal(""), Literal("")), 0)
+    checkEvaluation(Levenshtein(Literal("abc"), Literal("abc")), 0)
+    checkEvaluation(Levenshtein(Literal("kitten"), Literal("sitting")), 3)
+    checkEvaluation(Levenshtein(Literal("frog"), Literal("fog")), 1)
+    // scalastyle:off
+    // non ascii characters are not allowed in the code, so we disable the 
scalastyle here.
+    checkEvaluation(Levenshtein(Literal("千世"), Literal("fog")), 3)
+    checkEvaluation(Levenshtein(Literal("世界千世"), Literal("大a界b")), 
4)
+    // scalastyle:on
+  }
+
+  test("TRIM/LTRIM/RTRIM") {
+    val s = 'a.string.at(0)
+    checkEvaluation(StringTrim(Literal(" aa  ")), "aa", create_row(" abdef "))
+    checkEvaluation(StringTrim(s), "abdef", create_row(" abdef "))
+
+    checkEvaluation(StringTrimLeft(Literal(" aa  ")), "aa  ", create_row(" 
abdef "))
+    checkEvaluation(StringTrimLeft(s), "abdef ", create_row(" abdef "))
+
+    checkEvaluation(StringTrimRight(Literal(" aa  ")), " aa", create_row(" 
abdef "))
+    checkEvaluation(StringTrimRight(s), " abdef", create_row(" abdef "))
+
+    // scalastyle:off
+    // non ascii characters are not allowed in the source code, so we disable 
the scalastyle.
+    checkEvaluation(StringTrimRight(s), "  花花世界", create_row("  
花花世界 "))
+    checkEvaluation(StringTrimLeft(s), "花花世界 ", create_row("  
花花世界 "))
+    checkEvaluation(StringTrim(s), "花花世界", create_row("  花花世界 
"))
+    // scalastyle:on
+  }
+
+  test("FORMAT") {
+    val f = 'f.string.at(0)
+    val d1 = 'd.int.at(1)
+    val s1 = 's.int.at(2)
+
+    val row1 = create_row("aa%d%s", 12, "cc")
+    val row2 = create_row(null, 12, "cc")
+    checkEvaluation(StringFormat(Literal("aa%d%s"), Literal(123), 
Literal("a")), "aa123a", row1)
+    checkEvaluation(StringFormat(Literal("aa")), "aa", create_row(null))
+    checkEvaluation(StringFormat(Literal("aa%d%s"), Literal(123), 
Literal("a")), "aa123a", row1)
+
+    checkEvaluation(StringFormat(f, d1, s1), "aa12cc", row1)
+    checkEvaluation(StringFormat(f, d1, s1), null, row2)
+  }
+
+  test("INSTR") {
+    val s1 = 'a.string.at(0)
+    val s2 = 'b.string.at(1)
+    val s3 = 'c.string.at(2)
+    val row1 = create_row("aaads", "aa", "zz")
+
+    checkEvaluation(StringInstr(Literal("aaads"), Literal("aa")), 1, row1)
+    checkEvaluation(StringInstr(Literal("aaads"), Literal("de")), 0, row1)
+    checkEvaluation(StringInstr(Literal.create(null, StringType), 
Literal("de")), null, row1)
+    checkEvaluation(StringInstr(Literal("aaads"), Literal.create(null, 
StringType)), null, row1)
+
+    checkEvaluation(StringInstr(s1, s2), 1, row1)
+    checkEvaluation(StringInstr(s1, s3), 0, row1)
+
+    // scalastyle:off
+    // non ascii characters are not allowed in the source code, so we disable 
the scalastyle.
+    checkEvaluation(StringInstr(s1, s2), 3, create_row("花花世界", 
"世界"))
+    checkEvaluation(StringInstr(s1, s2), 1, create_row("花花世界", "花"))
+    checkEvaluation(StringInstr(s1, s2), 0, create_row("花花世界", "小"))
+    // scalastyle:on
+  }
+
+  test("LOCATE") {
+    val s1 = 'a.string.at(0)
+    val s2 = 'b.string.at(1)
+    val s3 = 'c.string.at(2)
+    val s4 = 'd.int.at(3)
+    val row1 = create_row("aaads", "aa", "zz", 1)
+
+    checkEvaluation(new StringLocate(Literal("aa"), Literal("aaads")), 1, row1)
+    checkEvaluation(StringLocate(Literal("aa"), Literal("aaads"), Literal(1)), 
2, row1)
+    checkEvaluation(StringLocate(Literal("aa"), Literal("aaads"), Literal(2)), 
0, row1)
+    checkEvaluation(new StringLocate(Literal("de"), Literal("aaads")), 0, row1)
+    checkEvaluation(StringLocate(Literal("de"), Literal("aaads"), 1), 0, row1)
+
+    checkEvaluation(new StringLocate(s2, s1), 1, row1)
+    checkEvaluation(StringLocate(s2, s1, s4), 2, row1)
+    checkEvaluation(new StringLocate(s3, s1), 0, row1)
+    checkEvaluation(StringLocate(s3, s1, Literal.create(null, IntegerType)), 
0, row1)
+  }
+
+  test("LPAD/RPAD") {
+    val s1 = 'a.string.at(0)
+    val s2 = 'b.int.at(1)
+    val s3 = 'c.string.at(2)
+    val row1 = create_row("hi", 5, "??")
+    val row2 = create_row("hi", 1, "?")
+    val row3 = create_row(null, 1, "?")
+
+    checkEvaluation(StringLPad(Literal("hi"), Literal(5), Literal("??")), 
"???hi", row1)
+    checkEvaluation(StringLPad(Literal("hi"), Literal(1), Literal("??")), "h", 
row1)
+    checkEvaluation(StringLPad(s1, s2, s3), "???hi", row1)
+    checkEvaluation(StringLPad(s1, s2, s3), "h", row2)
+    checkEvaluation(StringLPad(s1, s2, s3), null, row3)
+
+    checkEvaluation(StringRPad(Literal("hi"), Literal(5), Literal("??")), 
"hi???", row1)
+    checkEvaluation(StringRPad(Literal("hi"), Literal(1), Literal("??")), "h", 
row1)
+    checkEvaluation(StringRPad(s1, s2, s3), "hi???", row1)
+    checkEvaluation(StringRPad(s1, s2, s3), "h", row2)
+    checkEvaluation(StringRPad(s1, s2, s3), null, row3)
+  }
+
+  test("REPEAT") {
+    val s1 = 'a.string.at(0)
+    val s2 = 'b.int.at(1)
+    val row1 = create_row("hi", 2)
+    val row2 = create_row(null, 1)
+
+    checkEvaluation(StringRepeat(Literal("hi"), Literal(2)), "hihi", row1)
+    checkEvaluation(StringRepeat(Literal("hi"), Literal(-1)), "", row1)
+    checkEvaluation(StringRepeat(s1, s2), "hihi", row1)
+    checkEvaluation(StringRepeat(s1, s2), null, row2)
+  }
+
+  test("REVERSE") {
+    val s = 'a.string.at(0)
+    val row1 = create_row("abccc")
+    checkEvaluation(StringReverse(Literal("abccc")), "cccba", row1)
+    checkEvaluation(StringReverse(s), "cccba", row1)
+  }
+
+  test("SPACE") {
+    val s1 = 'b.int.at(0)
+    val row1 = create_row(2)
+    val row2 = create_row(null)
+
+    checkEvaluation(StringSpace(Literal(2)), "  ", row1)
+    checkEvaluation(StringSpace(Literal(-1)), "", row1)
+    checkEvaluation(StringSpace(Literal(0)), "", row1)
+    checkEvaluation(StringSpace(s1), "  ", row1)
+    checkEvaluation(StringSpace(s1), null, row2)
+  }
+
+  test("SPLIT") {
+    val s1 = 'a.string.at(0)
+    val s2 = 'b.string.at(1)
+    val row1 = create_row("aa2bb3cc", "[1-9]+")
+
+    checkEvaluation(
+      StringSplit(Literal("aa2bb3cc"), Literal("[1-9]+")), Seq("aa", "bb", 
"cc"), row1)
+    checkEvaluation(
+      StringSplit(s1, s2), Seq("aa", "bb", "cc"), row1)
+  }
+
+  test("length for string / binary") {
+    val a = 'a.string.at(0)
+    val b = 'b.binary.at(0)
+    val bytes = Array[Byte](1, 2, 3, 1, 2)
+    val string = "abdef"
+
+    // scalastyle:off
+    // non ascii characters are not allowed in the source code, so we disable 
the scalastyle.
+    checkEvaluation(Length(Literal("a花花c")), 4, create_row(string))
+    // scalastyle:on
+    checkEvaluation(Length(Literal(bytes)), 5, create_row(Array[Byte]()))
+
+    checkEvaluation(Length(a), 5, create_row(string))
+    checkEvaluation(Length(b), 5, create_row(bytes))
+
+    checkEvaluation(Length(a), 0, create_row(""))
+    checkEvaluation(Length(b), 0, create_row(Array[Byte]()))
+
+    checkEvaluation(Length(a), null, create_row(null))
+    checkEvaluation(Length(b), null, create_row(null))
+
+    checkEvaluation(Length(Literal.create(null, StringType)), null, 
create_row(string))
+    checkEvaluation(Length(Literal.create(null, BinaryType)), null, 
create_row(bytes))
+  }
+
+  test("number format") {
+    checkEvaluation(FormatNumber(Literal(4.asInstanceOf[Byte]), Literal(3)), 
"4.000")
+    checkEvaluation(FormatNumber(Literal(4.asInstanceOf[Short]), Literal(3)), 
"4.000")
+    checkEvaluation(FormatNumber(Literal(4.0f), Literal(3)), "4.000")
+    checkEvaluation(FormatNumber(Literal(4), Literal(3)), "4.000")
+    checkEvaluation(FormatNumber(Literal(12831273.23481d), Literal(3)), 
"12,831,273.235")
+    checkEvaluation(FormatNumber(Literal(12831273.83421d), Literal(0)), 
"12,831,274")
+    checkEvaluation(FormatNumber(Literal(123123324123L), Literal(3)), 
"123,123,324,123.000")
+    checkEvaluation(FormatNumber(Literal(123123324123L), Literal(-1)), null)
+    checkEvaluation(
+      FormatNumber(
+        Literal(Decimal(123123324123L) * Decimal(123123.21234d)), Literal(4)),
+      "15,159,339,180,002,773.2778")
+    checkEvaluation(FormatNumber(Literal.create(null, IntegerType), 
Literal(3)), null)
+    checkEvaluation(FormatNumber(Literal.create(null, NullType), Literal(3)), 
null)
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringFunctionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringFunctionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringFunctionsSuite.scala
deleted file mode 100644
index 5d7763b..0000000
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringFunctionsSuite.scala
+++ /dev/null
@@ -1,462 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.types._
-
-
-class StringFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
-
-  test("StringComparison") {
-    val row = create_row("abc", null)
-    val c1 = 'a.string.at(0)
-    val c2 = 'a.string.at(1)
-
-    checkEvaluation(c1 contains "b", true, row)
-    checkEvaluation(c1 contains "x", false, row)
-    checkEvaluation(c2 contains "b", null, row)
-    checkEvaluation(c1 contains Literal.create(null, StringType), null, row)
-
-    checkEvaluation(c1 startsWith "a", true, row)
-    checkEvaluation(c1 startsWith "b", false, row)
-    checkEvaluation(c2 startsWith "a", null, row)
-    checkEvaluation(c1 startsWith Literal.create(null, StringType), null, row)
-
-    checkEvaluation(c1 endsWith "c", true, row)
-    checkEvaluation(c1 endsWith "b", false, row)
-    checkEvaluation(c2 endsWith "b", null, row)
-    checkEvaluation(c1 endsWith Literal.create(null, StringType), null, row)
-  }
-
-  test("Substring") {
-    val row = create_row("example", "example".toArray.map(_.toByte))
-
-    val s = 'a.string.at(0)
-
-    // substring from zero position with less-than-full length
-    checkEvaluation(
-      Substring(s, Literal.create(0, IntegerType), Literal.create(2, 
IntegerType)), "ex", row)
-    checkEvaluation(
-      Substring(s, Literal.create(1, IntegerType), Literal.create(2, 
IntegerType)), "ex", row)
-
-    // substring from zero position with full length
-    checkEvaluation(
-      Substring(s, Literal.create(0, IntegerType), Literal.create(7, 
IntegerType)), "example", row)
-    checkEvaluation(
-      Substring(s, Literal.create(1, IntegerType), Literal.create(7, 
IntegerType)), "example", row)
-
-    // substring from zero position with greater-than-full length
-    checkEvaluation(Substring(s, Literal.create(0, IntegerType), 
Literal.create(100, IntegerType)),
-      "example", row)
-    checkEvaluation(Substring(s, Literal.create(1, IntegerType), 
Literal.create(100, IntegerType)),
-      "example", row)
-
-    // substring from nonzero position with less-than-full length
-    checkEvaluation(Substring(s, Literal.create(2, IntegerType), 
Literal.create(2, IntegerType)),
-      "xa", row)
-
-    // substring from nonzero position with full length
-    checkEvaluation(Substring(s, Literal.create(2, IntegerType), 
Literal.create(6, IntegerType)),
-      "xample", row)
-
-    // substring from nonzero position with greater-than-full length
-    checkEvaluation(Substring(s, Literal.create(2, IntegerType), 
Literal.create(100, IntegerType)),
-      "xample", row)
-
-    // zero-length substring (within string bounds)
-    checkEvaluation(Substring(s, Literal.create(0, IntegerType), 
Literal.create(0, IntegerType)),
-      "", row)
-
-    // zero-length substring (beyond string bounds)
-    checkEvaluation(Substring(s, Literal.create(100, IntegerType), 
Literal.create(4, IntegerType)),
-      "", row)
-
-    // substring(null, _, _) -> null
-    checkEvaluation(Substring(s, Literal.create(100, IntegerType), 
Literal.create(4, IntegerType)),
-      null, create_row(null))
-
-    // substring(_, null, _) -> null
-    checkEvaluation(Substring(s, Literal.create(null, IntegerType), 
Literal.create(4, IntegerType)),
-      null, row)
-
-    // substring(_, _, null) -> null
-    checkEvaluation(
-      Substring(s, Literal.create(100, IntegerType), Literal.create(null, 
IntegerType)),
-      null,
-      row)
-
-    // 2-arg substring from zero position
-    checkEvaluation(
-      Substring(s, Literal.create(0, IntegerType), 
Literal.create(Integer.MAX_VALUE, IntegerType)),
-      "example",
-      row)
-    checkEvaluation(
-      Substring(s, Literal.create(1, IntegerType), 
Literal.create(Integer.MAX_VALUE, IntegerType)),
-      "example",
-      row)
-
-    // 2-arg substring from nonzero position
-    checkEvaluation(
-      Substring(s, Literal.create(2, IntegerType), 
Literal.create(Integer.MAX_VALUE, IntegerType)),
-      "xample",
-      row)
-
-    val s_notNull = 'a.string.notNull.at(0)
-
-    assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, 
IntegerType)).nullable
-      === true)
-    assert(
-      Substring(s_notNull, Literal.create(0, IntegerType), Literal.create(2, 
IntegerType)).nullable
-        === false)
-    assert(Substring(s_notNull,
-      Literal.create(null, IntegerType), Literal.create(2, 
IntegerType)).nullable === true)
-    assert(Substring(s_notNull,
-      Literal.create(0, IntegerType), Literal.create(null, 
IntegerType)).nullable === true)
-
-    checkEvaluation(s.substr(0, 2), "ex", row)
-    checkEvaluation(s.substr(0), "example", row)
-    checkEvaluation(s.substring(0, 2), "ex", row)
-    checkEvaluation(s.substring(0), "example", row)
-  }
-
-  test("LIKE literal Regular Expression") {
-    checkEvaluation(Literal.create(null, StringType).like("a"), null)
-    checkEvaluation(Literal.create("a", StringType).like(Literal.create(null, 
StringType)), null)
-    checkEvaluation(Literal.create(null, StringType).like(Literal.create(null, 
StringType)), null)
-    checkEvaluation("abdef" like "abdef", true)
-    checkEvaluation("a_%b" like "a\\__b", true)
-    checkEvaluation("addb" like "a_%b", true)
-    checkEvaluation("addb" like "a\\__b", false)
-    checkEvaluation("addb" like "a%\\%b", false)
-    checkEvaluation("a_%b" like "a%\\%b", true)
-    checkEvaluation("addb" like "a%", true)
-    checkEvaluation("addb" like "**", false)
-    checkEvaluation("abc" like "a%", true)
-    checkEvaluation("abc"  like "b%", false)
-    checkEvaluation("abc"  like "bc%", false)
-    checkEvaluation("a\nb" like "a_b", true)
-    checkEvaluation("ab" like "a%b", true)
-    checkEvaluation("a\nb" like "a%b", true)
-  }
-
-  test("LIKE Non-literal Regular Expression") {
-    val regEx = 'a.string.at(0)
-    checkEvaluation("abcd" like regEx, null, create_row(null))
-    checkEvaluation("abdef" like regEx, true, create_row("abdef"))
-    checkEvaluation("a_%b" like regEx, true, create_row("a\\__b"))
-    checkEvaluation("addb" like regEx, true, create_row("a_%b"))
-    checkEvaluation("addb" like regEx, false, create_row("a\\__b"))
-    checkEvaluation("addb" like regEx, false, create_row("a%\\%b"))
-    checkEvaluation("a_%b" like regEx, true, create_row("a%\\%b"))
-    checkEvaluation("addb" like regEx, true, create_row("a%"))
-    checkEvaluation("addb" like regEx, false, create_row("**"))
-    checkEvaluation("abc" like regEx, true, create_row("a%"))
-    checkEvaluation("abc" like regEx, false, create_row("b%"))
-    checkEvaluation("abc" like regEx, false, create_row("bc%"))
-    checkEvaluation("a\nb" like regEx, true, create_row("a_b"))
-    checkEvaluation("ab" like regEx, true, create_row("a%b"))
-    checkEvaluation("a\nb" like regEx, true, create_row("a%b"))
-
-    checkEvaluation(Literal.create(null, StringType) like regEx, null, 
create_row("bc%"))
-  }
-
-  test("RLIKE literal Regular Expression") {
-    checkEvaluation(Literal.create(null, StringType) rlike "abdef", null)
-    checkEvaluation("abdef" rlike Literal.create(null, StringType), null)
-    checkEvaluation(Literal.create(null, StringType) rlike 
Literal.create(null, StringType), null)
-    checkEvaluation("abdef" rlike "abdef", true)
-    checkEvaluation("abbbbc" rlike "a.*c", true)
-
-    checkEvaluation("fofo" rlike "^fo", true)
-    checkEvaluation("fo\no" rlike "^fo\no$", true)
-    checkEvaluation("Bn" rlike "^Ba*n", true)
-    checkEvaluation("afofo" rlike "fo", true)
-    checkEvaluation("afofo" rlike "^fo", false)
-    checkEvaluation("Baan" rlike "^Ba?n", false)
-    checkEvaluation("axe" rlike "pi|apa", false)
-    checkEvaluation("pip" rlike "^(pi)*$", false)
-
-    checkEvaluation("abc"  rlike "^ab", true)
-    checkEvaluation("abc"  rlike "^bc", false)
-    checkEvaluation("abc"  rlike "^ab", true)
-    checkEvaluation("abc"  rlike "^bc", false)
-
-    intercept[java.util.regex.PatternSyntaxException] {
-      evaluate("abbbbc" rlike "**")
-    }
-  }
-
-  test("RLIKE Non-literal Regular Expression") {
-    val regEx = 'a.string.at(0)
-    checkEvaluation("abdef" rlike regEx, true, create_row("abdef"))
-    checkEvaluation("abbbbc" rlike regEx, true, create_row("a.*c"))
-    checkEvaluation("fofo" rlike regEx, true, create_row("^fo"))
-    checkEvaluation("fo\no" rlike regEx, true, create_row("^fo\no$"))
-    checkEvaluation("Bn" rlike regEx, true, create_row("^Ba*n"))
-
-    intercept[java.util.regex.PatternSyntaxException] {
-      evaluate("abbbbc" rlike regEx, create_row("**"))
-    }
-  }
-
-  test("ascii for string") {
-    val a = 'a.string.at(0)
-    checkEvaluation(Ascii(Literal("efg")), 101, create_row("abdef"))
-    checkEvaluation(Ascii(a), 97, create_row("abdef"))
-    checkEvaluation(Ascii(a), 0, create_row(""))
-    checkEvaluation(Ascii(a), null, create_row(null))
-    checkEvaluation(Ascii(Literal.create(null, StringType)), null, 
create_row("abdef"))
-  }
-
-  test("base64/unbase64 for string") {
-    val a = 'a.string.at(0)
-    val b = 'b.binary.at(0)
-    val bytes = Array[Byte](1, 2, 3, 4)
-
-    checkEvaluation(Base64(Literal(bytes)), "AQIDBA==", create_row("abdef"))
-    checkEvaluation(Base64(UnBase64(Literal("AQIDBA=="))), "AQIDBA==", 
create_row("abdef"))
-    checkEvaluation(Base64(UnBase64(Literal(""))), "", create_row("abdef"))
-    checkEvaluation(Base64(UnBase64(Literal.create(null, StringType))), null, 
create_row("abdef"))
-    checkEvaluation(Base64(UnBase64(a)), "AQIDBA==", create_row("AQIDBA=="))
-
-    checkEvaluation(Base64(b), "AQIDBA==", create_row(bytes))
-    checkEvaluation(Base64(b), "", create_row(Array[Byte]()))
-    checkEvaluation(Base64(b), null, create_row(null))
-    checkEvaluation(Base64(Literal.create(null, StringType)), null, 
create_row("abdef"))
-
-    checkEvaluation(UnBase64(a), null, create_row(null))
-    checkEvaluation(UnBase64(Literal.create(null, StringType)), null, 
create_row("abdef"))
-  }
-
-  test("encode/decode for string") {
-    val a = 'a.string.at(0)
-    val b = 'b.binary.at(0)
-    // scalastyle:off
-    // non ascii characters are not allowed in the code, so we disable the 
scalastyle here.
-    checkEvaluation(
-      Decode(Encode(Literal("大千世界"), Literal("UTF-16LE")), 
Literal("UTF-16LE")), "大千世界")
-    checkEvaluation(
-      Decode(Encode(a, Literal("utf-8")), Literal("utf-8")), "大千世界", 
create_row("大千世界"))
-    checkEvaluation(
-      Decode(Encode(a, Literal("utf-8")), Literal("utf-8")), "", 
create_row(""))
-    // scalastyle:on
-    checkEvaluation(Encode(a, Literal("utf-8")), null, create_row(null))
-    checkEvaluation(Encode(Literal.create(null, StringType), 
Literal("utf-8")), null)
-    checkEvaluation(Encode(a, Literal.create(null, StringType)), null, 
create_row(""))
-
-    checkEvaluation(Decode(b, Literal("utf-8")), null, create_row(null))
-    checkEvaluation(Decode(Literal.create(null, BinaryType), 
Literal("utf-8")), null)
-    checkEvaluation(Decode(b, Literal.create(null, StringType)), null, 
create_row(null))
-  }
-
-  test("Levenshtein distance") {
-    checkEvaluation(Levenshtein(Literal.create(null, StringType), 
Literal("")), null)
-    checkEvaluation(Levenshtein(Literal(""), Literal.create(null, 
StringType)), null)
-    checkEvaluation(Levenshtein(Literal(""), Literal("")), 0)
-    checkEvaluation(Levenshtein(Literal("abc"), Literal("abc")), 0)
-    checkEvaluation(Levenshtein(Literal("kitten"), Literal("sitting")), 3)
-    checkEvaluation(Levenshtein(Literal("frog"), Literal("fog")), 1)
-    // scalastyle:off
-    // non ascii characters are not allowed in the code, so we disable the 
scalastyle here.
-    checkEvaluation(Levenshtein(Literal("千世"), Literal("fog")), 3)
-    checkEvaluation(Levenshtein(Literal("世界千世"), Literal("大a界b")), 
4)
-    // scalastyle:on
-  }
-
-  test("TRIM/LTRIM/RTRIM") {
-    val s = 'a.string.at(0)
-    checkEvaluation(StringTrim(Literal(" aa  ")), "aa", create_row(" abdef "))
-    checkEvaluation(StringTrim(s), "abdef", create_row(" abdef "))
-
-    checkEvaluation(StringTrimLeft(Literal(" aa  ")), "aa  ", create_row(" 
abdef "))
-    checkEvaluation(StringTrimLeft(s), "abdef ", create_row(" abdef "))
-
-    checkEvaluation(StringTrimRight(Literal(" aa  ")), " aa", create_row(" 
abdef "))
-    checkEvaluation(StringTrimRight(s), " abdef", create_row(" abdef "))
-
-    // scalastyle:off
-    // non ascii characters are not allowed in the source code, so we disable 
the scalastyle.
-    checkEvaluation(StringTrimRight(s), "  花花世界", create_row("  
花花世界 "))
-    checkEvaluation(StringTrimLeft(s), "花花世界 ", create_row("  
花花世界 "))
-    checkEvaluation(StringTrim(s), "花花世界", create_row("  花花世界 
"))
-    // scalastyle:on
-  }
-
-  test("FORMAT") {
-    val f = 'f.string.at(0)
-    val d1 = 'd.int.at(1)
-    val s1 = 's.int.at(2)
-
-    val row1 = create_row("aa%d%s", 12, "cc")
-    val row2 = create_row(null, 12, "cc")
-    checkEvaluation(StringFormat(Literal("aa%d%s"), Literal(123), 
Literal("a")), "aa123a", row1)
-    checkEvaluation(StringFormat(Literal("aa")), "aa", create_row(null))
-    checkEvaluation(StringFormat(Literal("aa%d%s"), Literal(123), 
Literal("a")), "aa123a", row1)
-
-    checkEvaluation(StringFormat(f, d1, s1), "aa12cc", row1)
-    checkEvaluation(StringFormat(f, d1, s1), null, row2)
-  }
-
-  test("INSTR") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.string.at(1)
-    val s3 = 'c.string.at(2)
-    val row1 = create_row("aaads", "aa", "zz")
-
-    checkEvaluation(StringInstr(Literal("aaads"), Literal("aa")), 1, row1)
-    checkEvaluation(StringInstr(Literal("aaads"), Literal("de")), 0, row1)
-    checkEvaluation(StringInstr(Literal.create(null, StringType), 
Literal("de")), null, row1)
-    checkEvaluation(StringInstr(Literal("aaads"), Literal.create(null, 
StringType)), null, row1)
-
-    checkEvaluation(StringInstr(s1, s2), 1, row1)
-    checkEvaluation(StringInstr(s1, s3), 0, row1)
-
-    // scalastyle:off
-    // non ascii characters are not allowed in the source code, so we disable 
the scalastyle.
-    checkEvaluation(StringInstr(s1, s2), 3, create_row("花花世界", 
"世界"))
-    checkEvaluation(StringInstr(s1, s2), 1, create_row("花花世界", "花"))
-    checkEvaluation(StringInstr(s1, s2), 0, create_row("花花世界", "小"))
-    // scalastyle:on
-  }
-
-  test("LOCATE") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.string.at(1)
-    val s3 = 'c.string.at(2)
-    val s4 = 'd.int.at(3)
-    val row1 = create_row("aaads", "aa", "zz", 1)
-
-    checkEvaluation(new StringLocate(Literal("aa"), Literal("aaads")), 1, row1)
-    checkEvaluation(StringLocate(Literal("aa"), Literal("aaads"), Literal(1)), 
2, row1)
-    checkEvaluation(StringLocate(Literal("aa"), Literal("aaads"), Literal(2)), 
0, row1)
-    checkEvaluation(new StringLocate(Literal("de"), Literal("aaads")), 0, row1)
-    checkEvaluation(StringLocate(Literal("de"), Literal("aaads"), 1), 0, row1)
-
-    checkEvaluation(new StringLocate(s2, s1), 1, row1)
-    checkEvaluation(StringLocate(s2, s1, s4), 2, row1)
-    checkEvaluation(new StringLocate(s3, s1), 0, row1)
-    checkEvaluation(StringLocate(s3, s1, Literal.create(null, IntegerType)), 
0, row1)
-  }
-
-  test("LPAD/RPAD") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.int.at(1)
-    val s3 = 'c.string.at(2)
-    val row1 = create_row("hi", 5, "??")
-    val row2 = create_row("hi", 1, "?")
-    val row3 = create_row(null, 1, "?")
-
-    checkEvaluation(StringLPad(Literal("hi"), Literal(5), Literal("??")), 
"???hi", row1)
-    checkEvaluation(StringLPad(Literal("hi"), Literal(1), Literal("??")), "h", 
row1)
-    checkEvaluation(StringLPad(s1, s2, s3), "???hi", row1)
-    checkEvaluation(StringLPad(s1, s2, s3), "h", row2)
-    checkEvaluation(StringLPad(s1, s2, s3), null, row3)
-
-    checkEvaluation(StringRPad(Literal("hi"), Literal(5), Literal("??")), 
"hi???", row1)
-    checkEvaluation(StringRPad(Literal("hi"), Literal(1), Literal("??")), "h", 
row1)
-    checkEvaluation(StringRPad(s1, s2, s3), "hi???", row1)
-    checkEvaluation(StringRPad(s1, s2, s3), "h", row2)
-    checkEvaluation(StringRPad(s1, s2, s3), null, row3)
-  }
-
-  test("REPEAT") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.int.at(1)
-    val row1 = create_row("hi", 2)
-    val row2 = create_row(null, 1)
-
-    checkEvaluation(StringRepeat(Literal("hi"), Literal(2)), "hihi", row1)
-    checkEvaluation(StringRepeat(Literal("hi"), Literal(-1)), "", row1)
-    checkEvaluation(StringRepeat(s1, s2), "hihi", row1)
-    checkEvaluation(StringRepeat(s1, s2), null, row2)
-  }
-
-  test("REVERSE") {
-    val s = 'a.string.at(0)
-    val row1 = create_row("abccc")
-    checkEvaluation(StringReverse(Literal("abccc")), "cccba", row1)
-    checkEvaluation(StringReverse(s), "cccba", row1)
-  }
-
-  test("SPACE") {
-    val s1 = 'b.int.at(0)
-    val row1 = create_row(2)
-    val row2 = create_row(null)
-
-    checkEvaluation(StringSpace(Literal(2)), "  ", row1)
-    checkEvaluation(StringSpace(Literal(-1)), "", row1)
-    checkEvaluation(StringSpace(Literal(0)), "", row1)
-    checkEvaluation(StringSpace(s1), "  ", row1)
-    checkEvaluation(StringSpace(s1), null, row2)
-  }
-
-  test("SPLIT") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.string.at(1)
-    val row1 = create_row("aa2bb3cc", "[1-9]+")
-
-    checkEvaluation(
-      StringSplit(Literal("aa2bb3cc"), Literal("[1-9]+")), Seq("aa", "bb", 
"cc"), row1)
-    checkEvaluation(
-      StringSplit(s1, s2), Seq("aa", "bb", "cc"), row1)
-  }
-
-  test("length for string / binary") {
-    val a = 'a.string.at(0)
-    val b = 'b.binary.at(0)
-    val bytes = Array[Byte](1, 2, 3, 1, 2)
-    val string = "abdef"
-
-    // scalastyle:off
-    // non ascii characters are not allowed in the source code, so we disable 
the scalastyle.
-    checkEvaluation(Length(Literal("a花花c")), 4, create_row(string))
-    // scalastyle:on
-    checkEvaluation(Length(Literal(bytes)), 5, create_row(Array[Byte]()))
-
-    checkEvaluation(Length(a), 5, create_row(string))
-    checkEvaluation(Length(b), 5, create_row(bytes))
-
-    checkEvaluation(Length(a), 0, create_row(""))
-    checkEvaluation(Length(b), 0, create_row(Array[Byte]()))
-
-    checkEvaluation(Length(a), null, create_row(null))
-    checkEvaluation(Length(b), null, create_row(null))
-
-    checkEvaluation(Length(Literal.create(null, StringType)), null, 
create_row(string))
-    checkEvaluation(Length(Literal.create(null, BinaryType)), null, 
create_row(bytes))
-  }
-
-  test("number format") {
-    checkEvaluation(FormatNumber(Literal(4.asInstanceOf[Byte]), Literal(3)), 
"4.000")
-    checkEvaluation(FormatNumber(Literal(4.asInstanceOf[Short]), Literal(3)), 
"4.000")
-    checkEvaluation(FormatNumber(Literal(4.0f), Literal(3)), "4.000")
-    checkEvaluation(FormatNumber(Literal(4), Literal(3)), "4.000")
-    checkEvaluation(FormatNumber(Literal(12831273.23481d), Literal(3)), 
"12,831,273.235")
-    checkEvaluation(FormatNumber(Literal(12831273.83421d), Literal(0)), 
"12,831,274")
-    checkEvaluation(FormatNumber(Literal(123123324123L), Literal(3)), 
"123,123,324,123.000")
-    checkEvaluation(FormatNumber(Literal(123123324123L), Literal(-1)), null)
-    checkEvaluation(
-      FormatNumber(
-        Literal(Decimal(123123324123L) * Decimal(123123.21234d)), Literal(4)),
-      "15,159,339,180,002,773.2778")
-    checkEvaluation(FormatNumber(Literal.create(null, IntegerType), 
Literal(3)), null)
-    checkEvaluation(FormatNumber(Literal.create(null, NullType), Literal(3)), 
null)
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index b56fd9a..c180407 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1711,6 +1711,28 @@ object functions {
   
//////////////////////////////////////////////////////////////////////////////////////////////
 
   /**
+   * Concatenates input strings together into a single string.
+   *
+   * @group string_funcs
+   * @since 1.5.0
+   */
+  @scala.annotation.varargs
+  def concat(exprs: Column*): Column = Concat(exprs.map(_.expr))
+
+  /**
+   * Concatenates input strings together into a single string.
+   *
+   * This is the variant of concat that takes in the column names.
+   *
+   * @group string_funcs
+   * @since 1.5.0
+   */
+  @scala.annotation.varargs
+  def concat(columnName: String, columnNames: String*): Column = {
+    concat((columnName +: columnNames).map(Column.apply): _*)
+  }
+
+  /**
    * Computes the length of a given string / binary value.
    *
    * @group string_funcs

http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 6dccdd8..29f1197 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -208,169 +208,6 @@ class DataFrameFunctionsSuite extends QueryTest {
       Row(2743272264L, 2180413220L))
   }
 
-  test("Levenshtein distance") {
-    val df = Seq(("kitten", "sitting"), ("frog", "fog")).toDF("l", "r")
-    checkAnswer(df.select(levenshtein("l", "r")), Seq(Row(3), Row(1)))
-    checkAnswer(df.selectExpr("levenshtein(l, r)"), Seq(Row(3), Row(1)))
-  }
-
-  test("string ascii function") {
-    val df = Seq(("abc", "")).toDF("a", "b")
-    checkAnswer(
-      df.select(ascii($"a"), ascii("b")),
-      Row(97, 0))
-
-    checkAnswer(
-      df.selectExpr("ascii(a)", "ascii(b)"),
-      Row(97, 0))
-  }
-
-  test("string base64/unbase64 function") {
-    val bytes = Array[Byte](1, 2, 3, 4)
-    val df = Seq((bytes, "AQIDBA==")).toDF("a", "b")
-    checkAnswer(
-      df.select(base64("a"), base64($"a"), unbase64("b"), unbase64($"b")),
-      Row("AQIDBA==", "AQIDBA==", bytes, bytes))
-
-    checkAnswer(
-      df.selectExpr("base64(a)", "unbase64(b)"),
-      Row("AQIDBA==", bytes))
-  }
-
-  test("string encode/decode function") {
-    val bytes = Array[Byte](-27, -92, -89, -27, -115, -125, -28, -72, -106, 
-25, -107, -116)
-    // scalastyle:off  
-    // non ascii characters are not allowed in the code, so we disable the 
scalastyle here.
-    val df = Seq(("大千世界", "utf-8", bytes)).toDF("a", "b", "c")
-    checkAnswer(
-      df.select(
-        encode($"a", "utf-8"),
-        encode("a", "utf-8"),
-        decode($"c", "utf-8"),
-        decode("c", "utf-8")),
-      Row(bytes, bytes, "大千世界", "大千世界"))
-
-    checkAnswer(
-      df.selectExpr("encode(a, 'utf-8')", "decode(c, 'utf-8')"),
-      Row(bytes, "大千世界"))
-    // scalastyle:on
-  }
-
-  test("string trim functions") {
-    val df = Seq(("  example  ", "")).toDF("a", "b")
-
-    checkAnswer(
-      df.select(ltrim($"a"), rtrim($"a"), trim($"a")),
-      Row("example  ", "  example", "example"))
-
-    checkAnswer(
-      df.selectExpr("ltrim(a)", "rtrim(a)", "trim(a)"),
-      Row("example  ", "  example", "example"))
-  }
-
-  test("string formatString function") {
-    val df = Seq(("aa%d%s", 123, "cc")).toDF("a", "b", "c")
-
-    checkAnswer(
-      df.select(formatString($"a", $"b", $"c"), formatString("aa%d%s", "b", 
"c")),
-      Row("aa123cc", "aa123cc"))
-
-    checkAnswer(
-      df.selectExpr("printf(a, b, c)"),
-      Row("aa123cc"))
-  }
-
-  test("string instr function") {
-    val df = Seq(("aaads", "aa", "zz")).toDF("a", "b", "c")
-
-    checkAnswer(
-      df.select(instr($"a", $"b"), instr("a", "b")),
-      Row(1, 1))
-
-    checkAnswer(
-      df.selectExpr("instr(a, b)"),
-      Row(1))
-  }
-
-  test("string locate function") {
-    val df = Seq(("aaads", "aa", "zz", 1)).toDF("a", "b", "c", "d")
-
-    checkAnswer(
-      df.select(
-        locate($"b", $"a"), locate("b", "a"), locate($"b", $"a", 1),
-        locate("b", "a", 1), locate($"b", $"a", $"d"), locate("b", "a", "d")),
-      Row(1, 1, 2, 2, 2, 2))
-
-    checkAnswer(
-      df.selectExpr("locate(b, a)", "locate(b, a, d)"),
-      Row(1, 2))
-  }
-
-  test("string padding functions") {
-    val df = Seq(("hi", 5, "??")).toDF("a", "b", "c")
-
-    checkAnswer(
-          df.select(
-            lpad($"a", $"b", $"c"), rpad("a", "b", "c"),
-            lpad($"a", 1, $"c"), rpad("a", 1, "c")),
-          Row("???hi", "hi???", "h", "h"))
-
-    checkAnswer(
-      df.selectExpr("lpad(a, b, c)", "rpad(a, b, c)", "lpad(a, 1, c)", 
"rpad(a, 1, c)"),
-      Row("???hi", "hi???", "h", "h"))
-  }
-
-  test("string repeat function") {
-    val df = Seq(("hi", 2)).toDF("a", "b")
-
-    checkAnswer(
-      df.select(
-        repeat($"a", 2), repeat("a", 2), repeat($"a", $"b"), repeat("a", "b")),
-      Row("hihi", "hihi", "hihi", "hihi"))
-
-    checkAnswer(
-      df.selectExpr("repeat(a, 2)", "repeat(a, b)"),
-      Row("hihi", "hihi"))
-  }
-
-  test("string reverse function") {
-    val df = Seq(("hi", "hhhi")).toDF("a", "b")
-
-    checkAnswer(
-      df.select(reverse($"a"), reverse("b")),
-      Row("ih", "ihhh"))
-
-    checkAnswer(
-      df.selectExpr("reverse(b)"),
-      Row("ihhh"))
-  }
-
-  test("string space function") {
-    val df = Seq((2, 3)).toDF("a", "b")
-
-    checkAnswer(
-      df.select(space($"a"), space("b")),
-      Row("  ", "   "))
-
-    checkAnswer(
-      df.selectExpr("space(b)"),
-      Row("   "))
-  }
-
-  test("string split function") {
-    val df = Seq(("aa2bb3cc", "[1-9]+")).toDF("a", "b")
-
-    checkAnswer(
-      df.select(
-        split($"a", "[1-9]+"),
-        split("a", "[1-9]+")),
-      Row(Seq("aa", "bb", "cc"), Seq("aa", "bb", "cc")))
-
-    checkAnswer(
-      df.selectExpr("split(a, '[1-9]+')"),
-      Row(Seq("aa", "bb", "cc")))
-  }
-
   test("conditional function: least") {
     checkAnswer(
       testData2.select(least(lit(-1), lit(0), col("a"), col("b"))).limit(1),
@@ -430,83 +267,4 @@ class DataFrameFunctionsSuite extends QueryTest {
     )
   }
 
-  test("string / binary length function") {
-    val df = Seq(("123", Array[Byte](1, 2, 3, 4), 123)).toDF("a", "b", "c")
-    checkAnswer(
-      df.select(length($"a"), length("a"), length($"b"), length("b")),
-      Row(3, 3, 4, 4))
-
-    checkAnswer(
-      df.selectExpr("length(a)", "length(b)"),
-      Row(3, 4))
-
-    intercept[AnalysisException] {
-      checkAnswer(
-        df.selectExpr("length(c)"), // int type of the argument is unacceptable
-        Row("5.0000"))
-    }
-  }
-
-  test("number format function") {
-    val tuple =
-      ("aa", 1.asInstanceOf[Byte], 2.asInstanceOf[Short],
-        3.13223f, 4, 5L, 6.48173d, Decimal(7.128381))
-    val df =
-      Seq(tuple)
-        .toDF(
-          "a", // string "aa"
-          "b", // byte    1
-          "c", // short   2
-          "d", // float   3.13223f
-          "e", // integer 4
-          "f", // long    5L
-          "g", // double  6.48173d
-          "h") // decimal 7.128381
-
-    checkAnswer(
-      df.select(
-        format_number($"f", 4),
-        format_number("f", 4)),
-      Row("5.0000", "5.0000"))
-
-    checkAnswer(
-      df.selectExpr("format_number(b, e)"), // convert the 1st argument to 
integer
-      Row("1.0000"))
-
-    checkAnswer(
-      df.selectExpr("format_number(c, e)"), // convert the 1st argument to 
integer
-      Row("2.0000"))
-
-    checkAnswer(
-      df.selectExpr("format_number(d, e)"), // convert the 1st argument to 
double
-      Row("3.1322"))
-
-    checkAnswer(
-      df.selectExpr("format_number(e, e)"), // not convert anything
-      Row("4.0000"))
-
-    checkAnswer(
-      df.selectExpr("format_number(f, e)"), // not convert anything
-      Row("5.0000"))
-
-    checkAnswer(
-      df.selectExpr("format_number(g, e)"), // not convert anything
-      Row("6.4817"))
-
-    checkAnswer(
-      df.selectExpr("format_number(h, e)"), // not convert anything
-      Row("7.1284"))
-
-    intercept[AnalysisException] {
-      checkAnswer(
-        df.selectExpr("format_number(a, e)"), // string type of the 1st 
argument is unacceptable
-        Row("5.0000"))
-    }
-
-    intercept[AnalysisException] {
-      checkAnswer(
-        df.selectExpr("format_number(e, g)"), // decimal type of the 2nd 
argument is unacceptable
-        Row("5.0000"))
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
new file mode 100644
index 0000000..4eff33e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -0,0 +1,284 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.Decimal
+
+
+class StringFunctionsSuite extends QueryTest {
+
+  private lazy val ctx = org.apache.spark.sql.test.TestSQLContext
+  import ctx.implicits._
+
+  test("string concat") {
+    val df = Seq[(String, String, String)](("a", "b", null)).toDF("a", "b", 
"c")
+
+    checkAnswer(
+      df.select(concat($"a", $"b", $"c")),
+      Row("ab"))
+
+    checkAnswer(
+      df.selectExpr("concat(a, b, c)"),
+      Row("ab"))
+  }
+
+
+  test("string Levenshtein distance") {
+    val df = Seq(("kitten", "sitting"), ("frog", "fog")).toDF("l", "r")
+    checkAnswer(df.select(levenshtein("l", "r")), Seq(Row(3), Row(1)))
+    checkAnswer(df.selectExpr("levenshtein(l, r)"), Seq(Row(3), Row(1)))
+  }
+
+  test("string ascii function") {
+    val df = Seq(("abc", "")).toDF("a", "b")
+    checkAnswer(
+      df.select(ascii($"a"), ascii("b")),
+      Row(97, 0))
+
+    checkAnswer(
+      df.selectExpr("ascii(a)", "ascii(b)"),
+      Row(97, 0))
+  }
+
+  test("string base64/unbase64 function") {
+    val bytes = Array[Byte](1, 2, 3, 4)
+    val df = Seq((bytes, "AQIDBA==")).toDF("a", "b")
+    checkAnswer(
+      df.select(base64("a"), base64($"a"), unbase64("b"), unbase64($"b")),
+      Row("AQIDBA==", "AQIDBA==", bytes, bytes))
+
+    checkAnswer(
+      df.selectExpr("base64(a)", "unbase64(b)"),
+      Row("AQIDBA==", bytes))
+  }
+
+  test("string encode/decode function") {
+    val bytes = Array[Byte](-27, -92, -89, -27, -115, -125, -28, -72, -106, 
-25, -107, -116)
+    // scalastyle:off
+    // non ascii characters are not allowed in the code, so we disable the 
scalastyle here.
+    val df = Seq(("大千世界", "utf-8", bytes)).toDF("a", "b", "c")
+    checkAnswer(
+      df.select(
+        encode($"a", "utf-8"),
+        encode("a", "utf-8"),
+        decode($"c", "utf-8"),
+        decode("c", "utf-8")),
+      Row(bytes, bytes, "大千世界", "大千世界"))
+
+    checkAnswer(
+      df.selectExpr("encode(a, 'utf-8')", "decode(c, 'utf-8')"),
+      Row(bytes, "大千世界"))
+    // scalastyle:on
+  }
+
+  test("string trim functions") {
+    val df = Seq(("  example  ", "")).toDF("a", "b")
+
+    checkAnswer(
+      df.select(ltrim($"a"), rtrim($"a"), trim($"a")),
+      Row("example  ", "  example", "example"))
+
+    checkAnswer(
+      df.selectExpr("ltrim(a)", "rtrim(a)", "trim(a)"),
+      Row("example  ", "  example", "example"))
+  }
+
+  test("string formatString function") {
+    val df = Seq(("aa%d%s", 123, "cc")).toDF("a", "b", "c")
+
+    checkAnswer(
+      df.select(formatString($"a", $"b", $"c"), formatString("aa%d%s", "b", 
"c")),
+      Row("aa123cc", "aa123cc"))
+
+    checkAnswer(
+      df.selectExpr("printf(a, b, c)"),
+      Row("aa123cc"))
+  }
+
+  test("string instr function") {
+    val df = Seq(("aaads", "aa", "zz")).toDF("a", "b", "c")
+
+    checkAnswer(
+      df.select(instr($"a", $"b"), instr("a", "b")),
+      Row(1, 1))
+
+    checkAnswer(
+      df.selectExpr("instr(a, b)"),
+      Row(1))
+  }
+
+  test("string locate function") {
+    val df = Seq(("aaads", "aa", "zz", 1)).toDF("a", "b", "c", "d")
+
+    checkAnswer(
+      df.select(
+        locate($"b", $"a"), locate("b", "a"), locate($"b", $"a", 1),
+        locate("b", "a", 1), locate($"b", $"a", $"d"), locate("b", "a", "d")),
+      Row(1, 1, 2, 2, 2, 2))
+
+    checkAnswer(
+      df.selectExpr("locate(b, a)", "locate(b, a, d)"),
+      Row(1, 2))
+  }
+
+  test("string padding functions") {
+    val df = Seq(("hi", 5, "??")).toDF("a", "b", "c")
+
+    checkAnswer(
+      df.select(
+        lpad($"a", $"b", $"c"), rpad("a", "b", "c"),
+        lpad($"a", 1, $"c"), rpad("a", 1, "c")),
+      Row("???hi", "hi???", "h", "h"))
+
+    checkAnswer(
+      df.selectExpr("lpad(a, b, c)", "rpad(a, b, c)", "lpad(a, 1, c)", 
"rpad(a, 1, c)"),
+      Row("???hi", "hi???", "h", "h"))
+  }
+
+  test("string repeat function") {
+    val df = Seq(("hi", 2)).toDF("a", "b")
+
+    checkAnswer(
+      df.select(
+        repeat($"a", 2), repeat("a", 2), repeat($"a", $"b"), repeat("a", "b")),
+      Row("hihi", "hihi", "hihi", "hihi"))
+
+    checkAnswer(
+      df.selectExpr("repeat(a, 2)", "repeat(a, b)"),
+      Row("hihi", "hihi"))
+  }
+
+  test("string reverse function") {
+    val df = Seq(("hi", "hhhi")).toDF("a", "b")
+
+    checkAnswer(
+      df.select(reverse($"a"), reverse("b")),
+      Row("ih", "ihhh"))
+
+    checkAnswer(
+      df.selectExpr("reverse(b)"),
+      Row("ihhh"))
+  }
+
+  test("string space function") {
+    val df = Seq((2, 3)).toDF("a", "b")
+
+    checkAnswer(
+      df.select(space($"a"), space("b")),
+      Row("  ", "   "))
+
+    checkAnswer(
+      df.selectExpr("space(b)"),
+      Row("   "))
+  }
+
+  test("string split function") {
+    val df = Seq(("aa2bb3cc", "[1-9]+")).toDF("a", "b")
+
+    checkAnswer(
+      df.select(
+        split($"a", "[1-9]+"),
+        split("a", "[1-9]+")),
+      Row(Seq("aa", "bb", "cc"), Seq("aa", "bb", "cc")))
+
+    checkAnswer(
+      df.selectExpr("split(a, '[1-9]+')"),
+      Row(Seq("aa", "bb", "cc")))
+  }
+
+  test("string / binary length function") {
+    val df = Seq(("123", Array[Byte](1, 2, 3, 4), 123)).toDF("a", "b", "c")
+    checkAnswer(
+      df.select(length($"a"), length("a"), length($"b"), length("b")),
+      Row(3, 3, 4, 4))
+
+    checkAnswer(
+      df.selectExpr("length(a)", "length(b)"),
+      Row(3, 4))
+
+    intercept[AnalysisException] {
+      checkAnswer(
+        df.selectExpr("length(c)"), // int type of the argument is unacceptable
+        Row("5.0000"))
+    }
+  }
+
+  test("number format function") {
+    val tuple =
+      ("aa", 1.asInstanceOf[Byte], 2.asInstanceOf[Short],
+        3.13223f, 4, 5L, 6.48173d, Decimal(7.128381))
+    val df =
+      Seq(tuple)
+        .toDF(
+          "a", // string "aa"
+          "b", // byte    1
+          "c", // short   2
+          "d", // float   3.13223f
+          "e", // integer 4
+          "f", // long    5L
+          "g", // double  6.48173d
+          "h") // decimal 7.128381
+
+    checkAnswer(
+      df.select(
+        format_number($"f", 4),
+        format_number("f", 4)),
+      Row("5.0000", "5.0000"))
+
+    checkAnswer(
+      df.selectExpr("format_number(b, e)"), // convert the 1st argument to 
integer
+      Row("1.0000"))
+
+    checkAnswer(
+      df.selectExpr("format_number(c, e)"), // convert the 1st argument to 
integer
+      Row("2.0000"))
+
+    checkAnswer(
+      df.selectExpr("format_number(d, e)"), // convert the 1st argument to 
double
+      Row("3.1322"))
+
+    checkAnswer(
+      df.selectExpr("format_number(e, e)"), // not convert anything
+      Row("4.0000"))
+
+    checkAnswer(
+      df.selectExpr("format_number(f, e)"), // not convert anything
+      Row("5.0000"))
+
+    checkAnswer(
+      df.selectExpr("format_number(g, e)"), // not convert anything
+      Row("6.4817"))
+
+    checkAnswer(
+      df.selectExpr("format_number(h, e)"), // not convert anything
+      Row("7.1284"))
+
+    intercept[AnalysisException] {
+      checkAnswer(
+        df.selectExpr("format_number(a, e)"), // string type of the 1st 
argument is unacceptable
+        Row("5.0000"))
+    }
+
+    intercept[AnalysisException] {
+      checkAnswer(
+        df.selectExpr("format_number(e, g)"), // decimal type of the 2nd 
argument is unacceptable
+        Row("5.0000"))
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 6b8f2f6..299cc59 100644
--- 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -256,6 +256,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with 
BeforeAndAfter {
     "timestamp_2",
     "timestamp_udf",
 
+    // Hive outputs NULL if any concat input has null. We never output null 
for concat.
+    "udf_concat",
+
     // Unlike Hive, we do support log base in (0, 1.0], therefore disable this
     "udf7"
   )
@@ -846,7 +849,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with 
BeforeAndAfter {
     "udf_case",
     "udf_ceil",
     "udf_ceiling",
-    "udf_concat",
     "udf_concat_insert1",
     "udf_concat_insert2",
     "udf_concat_ws",

http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java 
b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index e7f9fbb..9723b6e 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -21,6 +21,7 @@ import javax.annotation.Nonnull;
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
 
+import org.apache.spark.unsafe.PlatformDependent;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 
 import static org.apache.spark.unsafe.PlatformDependent.*;
@@ -322,7 +323,7 @@ public final class UTF8String implements 
Comparable<UTF8String>, Serializable {
       }
       i += numBytesForFirstByte(getByte(i));
       c += 1;
-    } while(i < numBytes);
+    } while (i < numBytes);
 
     return -1;
   }
@@ -395,6 +396,39 @@ public final class UTF8String implements 
Comparable<UTF8String>, Serializable {
     }
   }
 
+  /**
+   * Concatenates input strings together into a single string. A null input is 
skipped.
+   * For example, concat("a", null, "c") would yield "ac".
+   */
+  public static UTF8String concat(UTF8String... inputs) {
+    if (inputs == null) {
+      return fromBytes(new byte[0]);
+    }
+
+    // Compute the total length of the result.
+    int totalLength = 0;
+    for (int i = 0; i < inputs.length; i++) {
+      if (inputs[i] != null) {
+        totalLength += inputs[i].numBytes;
+      }
+    }
+
+    // Allocate a new byte array, and copy the inputs one by one into it.
+    final byte[] result = new byte[totalLength];
+    int offset = 0;
+    for (int i = 0; i < inputs.length; i++) {
+      if (inputs[i] != null) {
+        int len = inputs[i].numBytes;
+        PlatformDependent.copyMemory(
+          inputs[i].base, inputs[i].offset,
+          result, PlatformDependent.BYTE_ARRAY_OFFSET + offset,
+          len);
+        offset += len;
+      }
+    }
+    return fromBytes(result);
+  }
+
   @Override
   public String toString() {
     try {
@@ -413,7 +447,7 @@ public final class UTF8String implements 
Comparable<UTF8String>, Serializable {
   }
 
   @Override
-  public int compareTo(final UTF8String other) {
+  public int compareTo(@Nonnull final UTF8String other) {
     int len = Math.min(numBytes, other.numBytes);
     // TODO: compare 8 bytes as unsigned long
     for (int i = 0; i < len; i ++) {
@@ -434,7 +468,7 @@ public final class UTF8String implements 
Comparable<UTF8String>, Serializable {
   public boolean equals(final Object other) {
     if (other instanceof UTF8String) {
       UTF8String o = (UTF8String) other;
-      if (numBytes != o.numBytes){
+      if (numBytes != o.numBytes) {
         return false;
       }
       return ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, 
numBytes);

http://git-wip-us.apache.org/repos/asf/spark/blob/6e1e2eba/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
----------------------------------------------------------------------
diff --git 
a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java 
b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 694bdc2..0db7522 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -87,6 +87,20 @@ public class UTF8StringSuite {
   }
 
   @Test
+  public void concatTest() {
+    assertEquals(concat(), fromString(""));
+    assertEquals(concat(null), fromString(""));
+    assertEquals(concat(fromString("")), fromString(""));
+    assertEquals(concat(fromString("ab")), fromString("ab"));
+    assertEquals(concat(fromString("a"), fromString("b")), fromString("ab"));
+    assertEquals(concat(fromString("a"), fromString("b"), fromString("c")), 
fromString("abc"));
+    assertEquals(concat(fromString("a"), null, fromString("c")), 
fromString("ac"));
+    assertEquals(concat(fromString("a"), null, null), fromString("a"));
+    assertEquals(concat(null, null, null), fromString(""));
+    assertEquals(concat(fromString("数据"), fromString("砖头")), 
fromString("数据砖头"));
+  }
+
+  @Test
   public void contains() {
     assertTrue(fromString("").contains(fromString("")));
     assertTrue(fromString("hello").contains(fromString("ello")));


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to