[GitHub] spark pull request: [SPARK-7322] [SQL] [WIP] Support Window Functi...

scwf Mon, 18 May 2015 20:29:34 -0700

Github user scwf commented on a diff in the pull request:

    https://github.com/apache/spark/pull/6104#discussion_r30566922
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/WindowFunctionDefinition.scala ---
    @@ -0,0 +1,372 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql
    +
    +import scala.language.implicitConversions
    +
    +import org.apache.spark.annotation.Experimental
    +import org.apache.spark.sql.catalyst.expressions._
    +
    +/**
    + * :: Experimental ::
    + * A set of methods for window function definition for aggregate 
expressions.
    + * For example:
    + * {{{
    + *   // predefine a window
    + *   val w = partitionBy("name").orderBy("id")
    + *
    + *   df.select(
    + *     first("value")
    + *       over(w).as("first_value"),
    + *     last("value")
    + *       over(w).as("last_value"),
    + *     avg("value")
    + *       over(
    + *       partitionBy("k1")
    + *       .orderBy("k2", "k3")
    + *       .rows
    + *       .following(1)).as("avg_value"),
    + *     max("value")
    + *       .over(
    + *       partitionBy("k2")
    + *       .orderBy("k3")
    + *       .range
    + *       .between
    + *       .preceding(4)
    + *       .and
    + *       .following(3)).as("max_value"))
    + *
    + * }}}
    + *
    + */
    +@Experimental
    +class WindowFunctionDefinition {
    +  private var column: Column = _
    +  private var partitionSpec: Seq[Expression] = Nil
    +  private var orderSpec: Seq[SortOrder] = Nil
    +  private var frame: WindowFrame = UnspecifiedFrame
    +
    +  // Hint of when call the methods `.preceding(n)` `.currentRow()` 
`.following()`
    +  // if bindLower == true, then we will set the lower bound, otherwise, we 
should
    +  // set the upper bound for the Row/Range Frame.
    +  private var bindLower: Boolean = true
    +
    +  private def this(
    +      column: Column = null,
    +      partitionSpec: Seq[Expression] = Nil,
    +      orderSpec: Seq[SortOrder] = Nil,
    +      frame: WindowFrame = UnspecifiedFrame,
    +      bindLower: Boolean = true) {
    +    this()
    +    this.column = column
    +    this.partitionSpec = partitionSpec
    +    this.orderSpec = orderSpec
    +    this.frame     = frame
    +    this.bindLower = bindLower
    +  }
    +
    +  private[sql] def newColumn(c: Column): WindowFunctionDefinition = {
    +    new WindowFunctionDefinition(c, partitionSpec, orderSpec, frame, 
bindLower)
    +  }
    +
    +  /**
    +   * Returns a new [[WindowFunctionDefinition]] partitioned by the 
specified column.
    +   * {{{
    +   *   // The following 2 are equivalent
    +   *   df.over(partitionBy("k1", "k2", ...))
    +   *   df.over(partitionBy($"K1", $"k2", ...))
    +   * }}}
    +   * @group window_funcs
    +   */
    +  @scala.annotation.varargs
    +  def partitionBy(colName: String, colNames: String*): 
WindowFunctionDefinition = {
    +    partitionBy((colName +: colNames).map(Column(_)): _*)
    +  }
    +
    +  /**
    +   * Returns a new [[WindowFunctionDefinition]] partitioned by the 
specified column. For example:
    +   * {{{
    +   *   df.over(partitionBy($"col1", $"col2"))
    +   * }}}
    +   * @group window_funcs
    +   */
    +  @scala.annotation.varargs
    +  def partitionBy(cols: Column*): WindowFunctionDefinition = {
    +    new WindowFunctionDefinition(column, cols.map(_.expr), orderSpec, 
frame)
    --- End diff --
    
    how about update the `partitionSpec ` and return `this`?  we do not need to 
create new instance here



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request: [SPARK-7322] [SQL] [WIP] Support Window Functi...

Reply via email to