[jira] [Commented] (FLINK-4937) Add incremental group window aggregation for streaming Table API

ASF GitHub Bot (JIRA) Mon, 21 Nov 2016 15:22:01 -0800

    [ 
https://issues.apache.org/jira/browse/FLINK-4937?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15685063#comment-15685063
 ]


ASF GitHub Bot commented on FLINK-4937:
---------------------------------------

Github user sunjincheng121 commented on a diff in the pull request:

    https://github.com/apache/flink/pull/2792#discussion_r89012143
  
    --- Diff: 
flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/plan/nodes/datastream/DataStreamAggregate.scala
 ---
    @@ -135,50 +128,123 @@ class DataStreamAggregate(
           namedProperties)
     
         val prepareOpName = s"prepare select: ($aggString)"
    -    val mappedInput = inputDS
    -      .map(aggregateResult._1)
    -      .name(prepareOpName)
    -
    -    val groupReduceFunction = aggregateResult._2
    -    val rowTypeInfo = new RowTypeInfo(fieldTypes)
    -
    -    val result = {
    -      // grouped / keyed aggregation
    -      if (groupingKeys.length > 0) {
    -        val aggOpName = s"groupBy: (${groupingToString(inputType, 
grouping)}), " +
    -          s"window: ($window), " +
    -          s"select: ($aggString)"
    -        val aggregateFunction =
    -          createWindowAggregationFunction(window, namedProperties, 
groupReduceFunction)
    -
    -        val keyedStream = mappedInput.keyBy(groupingKeys: _*)
    -
    -        val windowedStream = createKeyedWindowedStream(window, keyedStream)
    -          .asInstanceOf[WindowedStream[Row, Tuple, DataStreamWindow]]
    -
    -        windowedStream
    -          .apply(aggregateFunction)
    -          .returns(rowTypeInfo)
    -          .name(aggOpName)
    -          .asInstanceOf[DataStream[Any]]
    +    val keyedAggOpName = s"groupBy: (${groupingToString(inputType, 
grouping)}), " +
    +      s"window: ($window), " +
    +      s"select: ($aggString)"
    +    val nonKeyedAggOpName = s"window: ($window), select: ($aggString)"
    +
    +    val (aggFieldIndexes, aggregates) =
    +      AggregateUtil.transformToAggregateFunctions(
    +        namedAggregates.map(_.getKey), inputType, grouping.length)
    +
    +    val result: DataStream[Any] = {
    +      // check whether all aggregates support partial aggregate
    +      if (aggregates.forall(_.supportPartial)){
    +        // do Incremental Aggregation
    +        // add grouping fields, position keys in the input, and input type
    +        val (mapFunction,
    +        reduceFunction,
    +        groupingOffsetMapping,
    +        aggOffsetMapping,
    +        intermediateRowArity) = 
AggregateUtil.createOperatorFunctionsForIncrementalAggregates(
    +          namedAggregates,
    +          inputType,
    +          getRowType,
    +          grouping,
    +          aggregates,
    +          aggFieldIndexes)
    +
    +        val mappedInput = inputDS
    +          .map(mapFunction)
    +          .name(prepareOpName)
    +
    +        // grouped / keyed aggregation
    +        if (groupingKeys.length > 0) {
    +
    +          val winFunction =
    +            createWindowIncrementalAggregationFunction(
    +              aggregates,
    +              groupingOffsetMapping,
    +              aggOffsetMapping,
    +              getRowType.getFieldCount,
    +              intermediateRowArity,
    +              window,
    +              namedProperties)
    +
    +          val keyedStream = mappedInput.keyBy(groupingKeys: _*)
    +          val windowedStream = createKeyedWindowedStream(window, 
keyedStream)
    +            .asInstanceOf[WindowedStream[Row, Tuple, DataStreamWindow]]
    +
    +          windowedStream
    +            .apply(reduceFunction, winFunction)
    +            .returns(rowTypeInfo)
    +            .name(keyedAggOpName)
    +            .asInstanceOf[DataStream[Any]]
    +        }
    +        // global / non-keyed aggregation
    +        else {
    +          val winFunction =
    +            createAllWindowIncrementalAggregationFunction(
    +              aggregates,
    +              groupingOffsetMapping,
    +              aggOffsetMapping,
    +              getRowType.getFieldCount,
    +              intermediateRowArity,
    +              window,
    +              namedProperties)
    +
    +          val windowedStream = createNonKeyedWindowedStream(window, 
mappedInput)
    +            .asInstanceOf[AllWindowedStream[Row, DataStreamWindow]]
    +          windowedStream
    +            .apply(reduceFunction, winFunction)
    +            .returns(rowTypeInfo)
    +            .name(nonKeyedAggOpName)
    +            .asInstanceOf[DataStream[Any]]
    +        }
           }
    -      // global / non-keyed aggregation
           else {
    -        val aggOpName = s"window: ($window), select: ($aggString)"
    -        val aggregateFunction =
    -          createAllWindowAggregationFunction(window, namedProperties, 
groupReduceFunction)
    -
    -        val windowedStream = createNonKeyedWindowedStream(window, 
mappedInput)
    -          .asInstanceOf[AllWindowedStream[Row, DataStreamWindow]]
    -
    -        windowedStream
    -          .apply(aggregateFunction)
    -          .returns(rowTypeInfo)
    -          .name(aggOpName)
    -          .asInstanceOf[DataStream[Any]]
    +        // do non-Incremental Aggregation
    +        // add grouping fields, position keys in the input, and input type
    +        val (mapFunction, groupReduceFunction) = 
AggregateUtil.createOperatorFunctionsForAggregates(
    --- End diff --
    
    agree!


> Add incremental group window aggregation for streaming Table API
> ----------------------------------------------------------------
>
>                 Key: FLINK-4937
>                 URL: https://issues.apache.org/jira/browse/FLINK-4937
>             Project: Flink
>          Issue Type: Sub-task
>          Components: Table API & SQL
>    Affects Versions: 1.2.0
>            Reporter: Fabian Hueske
>            Assignee: sunjincheng
>
> Group-window aggregates for streaming tables are currently not done in an 
> incremental fashion. This means that the window collects all records and 
> performs the aggregation when the window is closed instead of eagerly 
> updating a partial aggregate for every added record. Since records are 
> buffered, non-incremental aggregation requires more storage space than 
> incremental aggregation.
> The DataStream API which is used under the hood of the streaming Table API 
> features [incremental 
> aggregation|https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/windows.html#windowfunction-with-incremental-aggregation]
>  using a {{ReduceFunction}}.
> We should add support for incremental aggregation in group-windows.
> This is a follow-up task of FLINK-4691.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

[jira] [Commented] (FLINK-4937) Add incremental group window aggregation for streaming Table API

Reply via email to