[ 
https://issues.apache.org/jira/browse/FLINK-6232?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16014250#comment-16014250
 ] 

ASF GitHub Bot commented on FLINK-6232:
---------------------------------------

Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/flink/pull/3715#discussion_r116792670
  
    --- Diff: 
flink-libraries/flink-table/src/main/scala/org/apache/flink/table/runtime/join/JoinUtil.scala
 ---
    @@ -0,0 +1,468 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.flink.table.runtime.join
    +
    +import java.math.{BigDecimal => JBigDecimal}
    +import java.util
    +import java.util.EnumSet
    +
    +import org.apache.calcite.avatica.util.TimeUnit
    +import org.apache.calcite.rel.`type`.RelDataType
    +import org.apache.calcite.rel.core.JoinRelType
    +import org.apache.calcite.rex._
    +import org.apache.calcite.sql.fun.SqlStdOperatorTable
    +import org.apache.calcite.sql.parser.SqlParserPos
    +import org.apache.calcite.sql.{SqlIntervalQualifier, SqlKind}
    +import org.apache.flink.api.common.functions.{FilterFunction, 
FlatJoinFunction}
    +import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, 
TypeInformation}
    +import org.apache.flink.table.api.{TableConfig, TableException}
    +import org.apache.flink.table.calcite.FlinkTypeFactory
    +import org.apache.flink.table.codegen.{CodeGenException, CodeGenerator, 
ExpressionReducer}
    +import org.apache.flink.table.plan.nodes.logical.FlinkLogicalJoin
    +import org.apache.flink.table.plan.schema.RowSchema
    +import org.apache.flink.types.Row
    +
    +import scala.collection.JavaConversions._
    +import scala.collection.mutable.ArrayBuffer
    +
    +
    +object JoinUtil {
    +
    +  /**
    +    * Analyze time-condtion to get time boundary for each stream and get 
the time type
    +    * and return condition without time-condition.
    +    *
    +    * @param  condition   other condtion include time-condition
    +    * @param  leftFieldCount left stream fields count
    +    * @param  inputType   left and right connect stream type
    +    * @param  rexBuilder   util to build rexNode
    +    * @param  config      table environment config
    +    */
    +  private[flink] def analyzeTimeBoundary(
    +      condition: RexNode,
    +      leftLogicalFieldCnt: Int,
    +      leftPhysicalFieldCnt: Int,
    +      inputType: RelDataType,
    +      rexBuilder: RexBuilder,
    +      config: TableConfig): (RelDataType, Long, Long, RexNode) = {
    +    // analyze the time-conditon to get greate and less condition,
    +    // make sure left stream field in the left of the condition
    +    // e.g b.proctime > a.proctime - 1 will be translate to a.proctime - 1 
< b.proctime
    +    val greateConditions = new util.ArrayList[TimeSingleCondition]()
    +    val lessConditions = new util.ArrayList[TimeSingleCondition]()
    +    analyzeTimeCondition(condition, greateConditions,
    +      lessConditions, leftLogicalFieldCnt, inputType)
    +    if (greateConditions.size != lessConditions.size
    +        || greateConditions.size > 1
    +        || greateConditions.size == 0) {
    +      throw TableException(
    +        "Equality join time conditon should have proctime or rowtime 
indicator."
    +      )
    +    }
    +
    +    val greatCond = greateConditions.get(0)
    +    val lessCond = lessConditions.get(0)
    +    if (greatCond.timeType != lessCond.timeType) {
    +      throw TableException(
    +        "Equality join time conditon should all use proctime or all use 
rowtime."
    +      )
    +    }
    +
    +    var leftStreamWindowSize: Long = 0
    +    var rightStreamWindowSize: Long = 0
    +
    +    // only a.proctime > b.proctime - interval '1' hour need to store a 
stream
    +    val timeLiteral: RexLiteral =
    +        reduceTimeExpression(greatCond.rightExpr, greatCond.leftExpr, 
rexBuilder, config)
    +    leftStreamWindowSize = timeLiteral.getValue2.asInstanceOf[Long]
    +    // only need to store past records
    +    if (leftStreamWindowSize < 0) {
    +      leftStreamWindowSize = -leftStreamWindowSize
    +      if (!greatCond.isEqual) {
    +        leftStreamWindowSize -= 1
    +      }
    +    } else {
    +      leftStreamWindowSize = 0
    +    }
    +
    +    // only a.proctime < b.proctime + interval '1' hour need to store b 
stream
    +    val timeLiteral2: RexLiteral =
    +        reduceTimeExpression(lessCond.leftExpr, lessCond.rightExpr, 
rexBuilder, config)
    +    rightStreamWindowSize = timeLiteral2.getValue2.asInstanceOf[Long]
    +    // only need to store past records
    +    if (rightStreamWindowSize < 0) {
    +      rightStreamWindowSize = -rightStreamWindowSize
    +      if (!lessCond.isEqual) {
    +        rightStreamWindowSize -= 1
    +      }
    +    } else {
    +      rightStreamWindowSize = 0
    +    }
    +
    +    // get condition without time-condition
    +    // e.g a.price > b.price and a.proctime between b.proctime and 
b.proctime + interval '1' hour
    +    // will return a.price > b.price and true and true
    +    var conditionWithoutTime = removeTimeCondition(
    +      condition,
    +      greatCond.originCall,
    +      lessCond.originCall,
    +      rexBuilder,
    +      leftLogicalFieldCnt,
    +      leftPhysicalFieldCnt)
    +
    +    // reduce the expression
    +    // true and ture => true, otherwise keep the origin expression
    +    try {
    +      val exprReducer = new ExpressionReducer(config)
    +      val originList = new util.ArrayList[RexNode]()
    +      originList.add(conditionWithoutTime)
    +      val reduceList = new util.ArrayList[RexNode]()
    +      exprReducer.reduce(rexBuilder, originList, reduceList)
    +      conditionWithoutTime = reduceList.get(0)
    +    } catch {
    +      case _ : CodeGenException => // ignore
    +    }
    +
    +    (greatCond.timeType, leftStreamWindowSize, rightStreamWindowSize, 
conditionWithoutTime)
    +  }
    +
    +  /**
    +    * Generate other non-equi condition function
    +    * @param  config   table env config
    +    * @param  joinType  join type to determain whether input can be null
    +    * @param  leftType  left stream type
    +    * @param  rightType  right stream type
    +    * @param  returnType   return type
    +    * @param  otherCondition   non-equi condition
    +    * @param  ruleDescription  rule description
    +    */
    +  private[flink] def generateJoinFunction(
    +    config: TableConfig,
    +    joinType: JoinRelType,
    +    leftType: TypeInformation[Row],
    +    rightType: TypeInformation[Row],
    +    returnType: RowSchema,
    +    otherCondition: RexNode,
    +    ruleDescription: String) = {
    +
    +    // whether input can be null
    +    val nullCheck = joinType match {
    +      case JoinRelType.INNER => false
    +      case JoinRelType.LEFT  => true
    +      case JoinRelType.RIGHT => true
    +      case JoinRelType.FULL  => true
    +    }
    +
    +    // generate other non-equi function code
    +    val generator = new CodeGenerator(
    +      config,
    +      nullCheck,
    +      leftType,
    +      Some(rightType))
    +
    +    val conversion = generator.generateConverterResultExpression(
    +      returnType.physicalTypeInfo,
    +      returnType.physicalType.getFieldNames)
    +
    +    // if other condition is literal(true), then output the result directly
    +    val body = if (otherCondition.isAlwaysTrue) {
    +      s"""
    +         |${conversion.code}
    +         |${generator.collectorTerm}.collect(${conversion.resultTerm});
    +         |""".stripMargin
    +    }
    +    else {
    +      val condition = generator.generateExpression(otherCondition)
    +      s"""
    +         |${condition.code}
    +         |if (${condition.resultTerm}) {
    +         |  ${conversion.code}
    +         |  ${generator.collectorTerm}.collect(${conversion.resultTerm});
    +         |}
    +         |""".stripMargin
    +    }
    +
    +    val genFunction = generator.generateFunction(
    +      ruleDescription,
    +      classOf[FlatJoinFunction[Row, Row, Row]],
    +      body,
    +      returnType.physicalTypeInfo)
    +
    +    genFunction
    +  }
    +
    +  private case class TimeSingleCondition(
    +      timeType: RelDataType,
    +      leftExpr: RexNode,
    +      rightExpr: RexNode,
    +      isEqual: Boolean,
    +      originCall: RexNode)
    +
    +  val COMPARISON: util.Set[SqlKind] = EnumSet.of(
    +    SqlKind.LESS_THAN,
    +    SqlKind.GREATER_THAN,
    +    SqlKind.GREATER_THAN_OR_EQUAL,
    +    SqlKind.LESS_THAN_OR_EQUAL)
    +
    +  val EQUI_COMPARISON: util.Set[SqlKind] = EnumSet.of(
    +    SqlKind.GREATER_THAN_OR_EQUAL,
    +    SqlKind.LESS_THAN_OR_EQUAL)
    +
    +  val LESS_COMPARISON: util.Set[SqlKind] = EnumSet.of(
    +    SqlKind.LESS_THAN,
    +    SqlKind.LESS_THAN_OR_EQUAL)
    +
    +  val GREAT_COMPARISON: util.Set[SqlKind] = EnumSet.of(
    +    SqlKind.GREATER_THAN,
    +    SqlKind.GREATER_THAN_OR_EQUAL)
    +
    +  /**
    +    * Analyze time-conditon to divide all time-condition into great and 
less condition
    +    */
    +  private def analyzeTimeCondition(
    --- End diff --
    
    can be defined as an internal method in `analyzeTimeBoundary`


> Support proctime inner equi-join between two streams in the SQL API
> -------------------------------------------------------------------
>
>                 Key: FLINK-6232
>                 URL: https://issues.apache.org/jira/browse/FLINK-6232
>             Project: Flink
>          Issue Type: Sub-task
>          Components: Table API & SQL
>            Reporter: hongyuhong
>            Assignee: hongyuhong
>
> The goal of this issue is to add support for inner equi-join on proc time 
> streams to the SQL interface.
> Queries similar to the following should be supported:
> {code}
> SELECT o.proctime, o.productId, o.orderId, s.proctime AS shipTime 
> FROM Orders AS o 
> JOIN Shipments AS s 
> ON o.orderId = s.orderId 
> AND o.proctime BETWEEN s.proctime AND s.proctime + INTERVAL '1' HOUR;
> {code}
> The following restrictions should initially apply:
> * The join hint only support inner join
> * The ON clause should include equi-join condition
> * The time-condition {{o.proctime BETWEEN s.proctime AND s.proctime + 
> INTERVAL '1' HOUR}} only can use proctime that is a system attribute, the 
> time condition only support bounded time range like {{o.proctime BETWEEN 
> s.proctime - INTERVAL '1' HOUR AND s.proctime + INTERVAL '1' HOUR}}, not 
> support unbounded like {{o.proctime > s.protime}},  and  should include both 
> two stream's proctime attribute, {{o.proctime between proctime() and 
> proctime() + 1}} should also not be supported.
> This issue includes:
> * Design of the DataStream operator to deal with stream join
> * Translation from Calcite's RelNode representation (LogicalJoin). 



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to