[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/spark/pull/20015


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157925994
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,181 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
+  val instant: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
 DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   * @param input internalRow (time)
+   * @param maxLevel Maximum level that can be used for truncation (e.g 
MONTH for Date input)
+   * @param truncFunc function: (time, level) => time
+   */
+  protected def evalHelper(input: InternalRow, maxLevel: Int)(
+truncFunc: (Any, Int) => Any): Any = {
 val level = if (format.foldable) {
   truncLevel
 } else {
   DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 }
-if (level == -1) {
-  // unknown format
+if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) {
+  // unknown format or too large level
   null
 } else {
-  val d = date.eval(input)
-  if (d == null) {
+  val t = instant.eval(input)
+  if (t == null) {
 null
   } else {
-DateTimeUtils.truncDate(d.asInstanceOf[Int], level)
+truncFunc(t, level)
   }
 }
   }
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+  protected def codeGenHelper(
+  ctx: CodegenContext,
+  ev: ExprCode,
+  maxLevel: Int,
+  orderReversed: Boolean = false)(
+  truncFunc: (String, String) => String)
+: ExprCode = {
 val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
 
 if (format.foldable) {
-  if (truncLevel == -1) {
+  if (truncLevel == DateTimeUtils.TRUNC_INVALID || truncLevel > 
maxLevel) {
 ev.copy(code = s"""
   boolean ${ev.isNull} = true;
   ${ctx.javaType(dataType)} ${ev.value} = 
${ctx.defaultValue(dataType)};""")
   } else {
-val d = date.genCode(ctx)
+val t = instant.genCode(ctx)
+val truncFuncStr = truncFunc(t.value, truncLevel.toString)
 ev.copy(code = s"""
-  ${d.code}
-  boolean ${ev.isNull} = ${d.isNull};
+  ${t.code}
+  boolean ${ev.isNull} = ${t.isNull};
   ${ctx.javaType(dataType)} ${ev.value} = 
${ctx.defaultValue(dataType)};
   if (!${ev.isNull}) {
-${ev.value} = $dtu.truncDate(${d.value}, $truncLevel);
+${ev.value} = $dtu.$truncFuncStr;
   }""")
   }
 } else {
-  nullSafeCodeGen(ctx, ev, (dateVal, fmt) => {
+  nullSafeCodeGen(ctx, ev, (left, right) => {
 val form = ctx.freshName("form")
+val (dateVal, fmt) = if (orderReversed) {
+  (right, left)
+} else {
+  (left, right)
+}
+val truncFuncStr = truncFunc(dateVal, form)
 s"""
   int $form = $dtu.parseTruncLevel($fmt);
-  if ($form == -1) {
+  if ($form == -1 || $form > $maxLevel) {
 ${ev.isNull} = true;
   } else {
-${ev.value} = $dtu.truncDate($dateVal, $form);
+${ev.value} = $dtu.$truncFuncStr
   }
 """
   })
 }
   }
 }
 
+/**
+ * Returns date truncated to the 

[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157926202
  
--- Diff: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
 ---
@@ -563,6 +563,76 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 }
   }
 
+  test("truncTimestamp") {
+def test(
--- End diff --

`test` -> `testTrunc` ?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157926055
  
--- Diff: python/pyspark/sql/functions.py ---
@@ -,6 +,24 @@ def trunc(date, format):
 return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
 
 
+@since(2.3)
+def date_trunc(format, timestamp):
+"""
+Returns timestamp truncated to the unit specified by the format.
+
+:param format: 'year', '', 'yy', 'month', 'mon', 'mm',
+'day', 'dd', 'hour', 'minute', 'second', 'week', 'quarter'
+
+>>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['d'])
--- End diff --

`d` -> `t` or `ts`.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread gatorsmile
Github user gatorsmile commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157908622
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,183 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
+  val time: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
 DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   *
+   * @param input internalRow (time)
+   * @param maxLevel Maximum level that can be used for truncation (e.g 
MONTH for Date input)
+   * @param truncFunc function: (time, level) => time
+   * @return
--- End diff --

Remove `@return`


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread gatorsmile
Github user gatorsmile commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157908653
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,183 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
+  val time: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
 DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   *
--- End diff --

Remove this line.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157896273
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 ---
@@ -964,7 +981,62 @@ object DateTimeUtils {
   }
 
   /**
-   * Returns the truncate level, could be TRUNC_YEAR, TRUNC_MONTH, or 
TRUNC_INVALID,
+   * Returns the trunc date time from original date time and trunc level.
+   * Trunc level should be generated using `parseTruncLevel()`, should be 
between 1 and 8
+   */
+  def truncTimestamp(d: SQLTimestamp, level: Int, timeZone: TimeZone): 
SQLTimestamp = {
--- End diff --

nit: d -> ts or t


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157905770
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,183 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
+  val time: Expression
--- End diff --

Maybe, `time` -> `instant`.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread gatorsmile
Github user gatorsmile commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157902164
  
--- Diff: python/pyspark/sql/functions.py ---
@@ -,6 +,24 @@ def trunc(date, format):
 return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
 
 
+@since(2.3)
+def date_trunc(format, timestamp):
+"""
+Returns timestamp truncated to the unit specified by the format.
+
+:param format: 'year', '', 'yy', 'month', 'mon', 'mm',
--- End diff --

Also update the original `trunc`


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread gatorsmile
Github user gatorsmile commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157902098
  
--- Diff: python/pyspark/sql/functions.py ---
@@ -,6 +,24 @@ def trunc(date, format):
 return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
 
 
+@since(2.3)
+def date_trunc(format, timestamp):
+"""
+Returns timestamp truncated to the unit specified by the format.
+
+:param format: 'year', '', 'yy', 'month', 'mon', 'mm',
--- End diff --

Nit: `` -> ``


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread gatorsmile
Github user gatorsmile commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157821540
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncTime extends BinaryExpression with ImplicitCastInputTypes {
+  val time: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
 DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   *
+   * @param input
+   * @param maxLevel Maximum level that can be used for truncation (e.g 
MONTH for Date input)
+   * @param truncFunc
+   * @tparam T
+   * @return
+   */
+  protected def evalHelper[T](input: InternalRow, maxLevel: Int)(
+truncFunc: (Any, Int) => T): Any = {
 val level = if (format.foldable) {
   truncLevel
 } else {
   DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 }
-if (level == -1) {
+if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) {
   // unknown format
   null
 } else {
-  val d = date.eval(input)
+  val d = time.eval(input)
   if (d == null) {
 null
   } else {
-DateTimeUtils.truncDate(d.asInstanceOf[Int], level)
+truncFunc(d, level)
   }
 }
   }
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+  protected def codeGenHelper[T](
+  ctx: CodegenContext,
+  ev: ExprCode,
+  maxLevel: Int,
+  orderReversed: Boolean = false)(
+  truncFunc: (String, String) => String)
+: ExprCode = {
 val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
 
 if (format.foldable) {
-  if (truncLevel == -1) {
+  if (truncLevel == DateTimeUtils.TRUNC_INVALID || truncLevel > 
maxLevel) {
 ev.copy(code = s"""
   boolean ${ev.isNull} = true;
   ${ctx.javaType(dataType)} ${ev.value} = 
${ctx.defaultValue(dataType)};""")
   } else {
-val d = date.genCode(ctx)
+val d = time.genCode(ctx)
+val truncFuncStr = truncFunc(d.value, truncLevel.toString)
 ev.copy(code = s"""
   ${d.code}
   boolean ${ev.isNull} = ${d.isNull};
   ${ctx.javaType(dataType)} ${ev.value} = 
${ctx.defaultValue(dataType)};
   if (!${ev.isNull}) {
-${ev.value} = $dtu.truncDate(${d.value}, $truncLevel);
+${ev.value} = $dtu.$truncFuncStr;
   }""")
   }
 } else {
-  nullSafeCodeGen(ctx, ev, (dateVal, fmt) => {
+  nullSafeCodeGen(ctx, ev, (left, right) => {
 val form = ctx.freshName("form")
+val (dateVal, fmt) = if (orderReversed) {
+  (right, left)
+} else {
+  (left, right)
+}
+val truncFuncStr = truncFunc(dateVal, form)
 s"""
   int $form = $dtu.parseTruncLevel($fmt);
-  if ($form == -1) {
+  if ($form == -1 || $form > $maxLevel) {
 ${ev.isNull} = true;
   } else {
-${ev.value} = $dtu.truncDate($dateVal, $form);
+${ev.value} = $dtu.$truncFuncStr
   }
 """
   })
 }
   }
 }
 
+/**
+ * Returns date truncated to the unit specified by the format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(date, fmt) - Returns 

[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-19 Thread gczsjdy
Github user gczsjdy commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157686437
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncTime extends BinaryExpression with ImplicitCastInputTypes {
+  val time: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
 DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   *
+   * @param input
+   * @param maxLevel Maximum level that can be used for truncation (e.g 
MONTH for Date input)
+   * @param truncFunc
+   * @tparam T
+   * @return
+   */
+  protected def evalHelper[T](input: InternalRow, maxLevel: Int)(
+truncFunc: (Any, Int) => T): Any = {
--- End diff --

Maybe `truncFunc: (Any, Int) => Any` is enough? So we don't need to use the 
`T`, but I'm not sure if this is better... 


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread gczsjdy
Github user gczsjdy commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157676669
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncTime extends BinaryExpression with ImplicitCastInputTypes {
+  val time: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
 DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   *
+   * @param input
+   * @param maxLevel Maximum level that can be used for truncation (e.g 
MONTH for Date input)
+   * @param truncFunc
+   * @tparam T
+   * @return
+   */
+  protected def evalHelper[T](input: InternalRow, maxLevel: Int)(
+truncFunc: (Any, Int) => T): Any = {
 val level = if (format.foldable) {
   truncLevel
 } else {
   DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 }
-if (level == -1) {
+if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) {
--- End diff --

`// unknown format or too small level`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread gczsjdy
Github user gczsjdy commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157678588
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncTime extends BinaryExpression with ImplicitCastInputTypes {
+  val time: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
 DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   *
+   * @param input
+   * @param maxLevel Maximum level that can be used for truncation (e.g 
MONTH for Date input)
+   * @param truncFunc
+   * @tparam T
+   * @return
+   */
+  protected def evalHelper[T](input: InternalRow, maxLevel: Int)(
+truncFunc: (Any, Int) => T): Any = {
 val level = if (format.foldable) {
   truncLevel
 } else {
   DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 }
-if (level == -1) {
+if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) {
   // unknown format
   null
 } else {
-  val d = date.eval(input)
+  val d = time.eval(input)
   if (d == null) {
 null
   } else {
-DateTimeUtils.truncDate(d.asInstanceOf[Int], level)
+truncFunc(d, level)
   }
 }
   }
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+  protected def codeGenHelper[T](
--- End diff --

Why do we need a type parameter `T`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread gczsjdy
Github user gczsjdy commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157680290
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 ---
@@ -944,9 +954,16 @@ object DateTimeUtils {
 date + daysToMonthEnd
   }
 
-  private val TRUNC_TO_YEAR = 1
-  private val TRUNC_TO_MONTH = 2
-  private val TRUNC_INVALID = -1
+  // Visible for testing.
+  val TRUNC_TO_YEAR = 1
+  val TRUNC_TO_MONTH = 2
+  val TRUNC_TO_DAY = 3
+  val TRUNC_TO_HOUR = 4
+  val TRUNC_TO_MINUTE = 5
+  val TRUNC_TO_SECOND = 6
+  val TRUNC_TO_WEEK = 7
+  val TRUNC_TO_QUARTER = 8
+  val TRUNC_INVALID = -1
--- End diff --

Can we bring quarter and week forward, maybe to 3 and 4? Then it's more 
conform to the order of time granularity and max-level design is not influenced.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread gczsjdy
Github user gczsjdy commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157674840
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncTime extends BinaryExpression with ImplicitCastInputTypes {
+  val time: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
 DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   *
+   * @param input
+   * @param maxLevel Maximum level that can be used for truncation (e.g 
MONTH for Date input)
+   * @param truncFunc
+   * @tparam T
+   * @return
+   */
+  protected def evalHelper[T](input: InternalRow, maxLevel: Int)(
+truncFunc: (Any, Int) => T): Any = {
 val level = if (format.foldable) {
   truncLevel
 } else {
   DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 }
-if (level == -1) {
+if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) {
   // unknown format
   null
 } else {
-  val d = date.eval(input)
+  val d = time.eval(input)
--- End diff --

nit: Since this is a time, it can be `val t = ...`


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157673626
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncTime extends BinaryExpression with ImplicitCastInputTypes {
+  val time: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
 DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   *
+   * @param input
--- End diff --

Seems `input` and `truncFunc` descriptions missing.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157677311
  
--- Diff: python/pyspark/sql/functions.py ---
@@ -,6 +,24 @@ def trunc(date, format):
 return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
 
 
+@since(2.3)
+def date_trunc(format, timestamp):
+"""
+Returns timestamp truncated to the unit specified by the format.
+
+:param format: 'year', '', 'yy', 'month', 'mon', 'mm',
+'DAY', 'DD', 'HOUR', 'MINUTE', 'SECOND', 'WEEK', 'QUARTER'
--- End diff --

Could we make those lowercased too?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157677136
  
--- Diff: sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---
@@ -2797,6 +2797,21 @@ object functions {
 TruncDate(date.expr, Literal(format))
   }
 
+  /**
+   * Returns timestamp truncated to the unit specified by the format.
+   *
+   * @param format: 'year', '', 'yy' for truncate by year,
+   *   'month', 'mon', 'mm' for truncate by month,
--- End diff --

nit: one space each more.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157677400
  
--- Diff: sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---
@@ -2797,6 +2797,21 @@ object functions {
 TruncDate(date.expr, Literal(format))
   }
 
+  /**
+   * Returns timestamp truncated to the unit specified by the format.
+   *
+   * @param format: 'year', '', 'yy' for truncate by year,
+   *   'month', 'mon', 'mm' for truncate by month,
+   *   'day', 'dd' for truncate by day,
+   *   Other options are: second, minute, hour, week, month, 
quarter
--- End diff --

Maybe, `'second', 'minute', 'hour', 'week', 'month' and 'quarter'`


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157673559
  
--- Diff: python/pyspark/sql/functions.py ---
@@ -,6 +,24 @@ def trunc(date, format):
 return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
 
 
+@since(2.3)
+def date_trunc(format, timestamp):
+"""
+Returns timestamp truncated to the unit specified by the format.
+
+:param format: 'year', '', 'yy', 'month', 'mon', 'mm',
+'DAY', 'DD', 'HOUR', 'MINUTE', 'SECOND', 'WEEK', 'QUARTER'
+
+>>> df = spark.createDataFrame([('1997-02-28',)], ['d'])
--- End diff --

Can we use a timestamp string like `1997-02-28 05:02:11` to show the 
difference from `trunc` a bit more clearly?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...

2017-12-18 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/20015#discussion_r157675835
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 ---
@@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, 
format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the 
day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-Examples:
-  > SELECT _FUNC_('2009-02-12', 'MM');
-   2009-02-01
-  > SELECT _FUNC_('2015-10-27', 'YEAR');
-   2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, 
StringType)
-  override def dataType: DataType = DateType
+trait TruncTime extends BinaryExpression with ImplicitCastInputTypes {
--- End diff --

Maybe `TruncInstant`? I received this advice before and I liked it too. Not 
a big deal tho.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org