举个sql例子 select platformcodetoname(payPlatform) as platform, sum(payAmount) as pay_amount, cast(tumble_start(rt, interval '5' seconds) as BIGINT) as rowtime from payment_msg group by tumble(rt, interval '5' seconds), payPlatform 这个query 对每5s的tumble窗口做统计。
奇怪的不朽琴师 <1129656...@qq.com> 于2020年7月15日周三 上午11:10写道: > Shuiqiang,你好: > 我的目的是每间隔一段时间做一次汇总统计,比如每两秒做一下汇总,请问这个需求我改如何定义window? > > > ------------------ 原始邮件 ------------------ > 发件人: > "user-zh" > < > acqua....@gmail.com>; > 发送时间: 2020年7月15日(星期三) 上午10:51 > 收件人: "user-zh"<user-zh@flink.apache.org>; > > 主题: Re: pyflink1.11.0window > > > > 琴师你好, > 异常栈信息org.apache.flink.table.api.ValidationException: A tumble window > expects a size value literal. > 看起来是接下tumble window定义的代码不太正确吧 > > Best, > Shuiqiang > > 奇怪的不朽琴师 <1129656...@qq.com> 于2020年7月15日周三 上午10:27写道: > > > 你好: > > &nbsp; &nbsp; > > > &nbsp;我按着你回复的建议改了source但是会报新的错误,请问这个是因为什么?我想调试一个window一直没有成功,请帮帮我,谢谢。 > > Traceback (most recent call last): > > &nbsp; File "tou.py", line 71, in <module&gt; > > &nbsp; &nbsp; from_kafka_to_kafka_demo() > > &nbsp; File "tou.py", line 21, in from_kafka_to_kafka_demo > > &nbsp; &nbsp; .select(" id,&nbsp; time1 , time1 ")\ > > &nbsp; File > > "/usr/local/lib/python3.7/site-packages/pyflink/table/table.py", line > 907, > > in select > > &nbsp; &nbsp; return Table(self._j_table.select(fields), > self._t_env) > > &nbsp; File > "/usr/local/lib/python3.7/site-packages/py4j/java_gateway.py", > > line 1286, in __call__ > > &nbsp; &nbsp; answer, self.gateway_client, self.target_id, > self.name) > > &nbsp; File > > "/usr/local/lib/python3.7/site-packages/pyflink/util/exceptions.py", > line > > 147, in deco > > &nbsp; &nbsp; return f(*a, **kw) > > &nbsp; File > "/usr/local/lib/python3.7/site-packages/py4j/protocol.py", > > line 328, in get_return_value > > &nbsp; &nbsp; format(target_id, ".", name), value) > > py4j.protocol.Py4JJavaError: An error occurred while calling > o26.select. > > : org.apache.flink.table.api.ValidationException: A tumble window > expects > > a size value literal. > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.table.operations.utils.AggregateOperationFactory.getAsValueLiteral(AggregateOperationFactory.java:384) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.table.operations.utils.AggregateOperationFactory.validateAndCreateTumbleWindow(AggregateOperationFactory.java:302) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.table.operations.utils.AggregateOperationFactory.createResolvedWindow(AggregateOperationFactory.java:236) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.table.operations.utils.OperationTreeBuilder.windowAggregate(OperationTreeBuilder.java:250) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.table.api.internal.TableImpl$WindowGroupedTableImpl.select(TableImpl.java:794) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.table.api.internal.TableImpl$WindowGroupedTableImpl.select(TableImpl.java:781) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > java.lang.reflect.Method.invoke(Method.java:498) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.api.python.shaded.py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.api.python.shaded.py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.api.python.shaded.py4j.Gateway.invoke(Gateway.java:282) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.api.python.shaded.py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.api.python.shaded.py4j.commands.CallCommand.execute(CallCommand.java:79) > > &nbsp; &nbsp; &nbsp; &nbsp; at > > > org.apache.flink.api.python.shaded.py4j.GatewayConnection.run(GatewayConnection.java:238) > > &nbsp; &nbsp; &nbsp; &nbsp; at > java.lang.Thread.run(Thread.java:748) > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > def register_rides_source(st_env): > > &nbsp; &nbsp; source_ddl = \ > > &nbsp; &nbsp; """ > > &nbsp; &nbsp; create table source1( > > &nbsp; &nbsp; &nbsp;id int, > > &nbsp; &nbsp; &nbsp;time1 timestamp, > > &nbsp; &nbsp; &nbsp;type string, > > &nbsp; &nbsp; &nbsp;WATERMARK FOR time1 as time1 - > INTERVAL '2' SECOND > > &nbsp; &nbsp; &nbsp;) with ( > > &nbsp; &nbsp; 'connector.type' = 'kafka', > > &nbsp; &nbsp; 'update-mode' = 'append', > > &nbsp; &nbsp; 'connector.topic' = 'tp1', > > &nbsp; &nbsp; 'connector.properties.bootstrap.servers' = > 'localhost:9092', > > &nbsp; &nbsp; 'connector.properties.zookeeper.connect' = > 'localhost:2181', > > &nbsp; &nbsp; 'format.type' = 'json', > > &nbsp; &nbsp; 'format.derive-schema' = 'true', > > &nbsp; &nbsp; 'connector.version' = 'universal' > > &nbsp; &nbsp; &nbsp;) > > &nbsp; &nbsp; """ > > &nbsp; &nbsp; st_env.sql_update(source_ddl) > > > > &nbsp; &nbsp;&nbsp; > > &nbsp; &nbsp; s_env = > > StreamExecutionEnvironment.get_execution_environment() > > &nbsp; &nbsp; s_env.set_parallelism(1) > > > > > > &nbsp; &nbsp; st_env = StreamTableEnvironment.create(s_env) > > > > > > &nbsp; &nbsp; register_rides_source(st_env) > > &nbsp; &nbsp; register_rides_sink(st_env) > > > > > > &nbsp; &nbsp; st_env.from_path("source1")\ > > &nbsp; &nbsp; &nbsp; &nbsp; > > .window(Tumble.over("2.secends").on("time1").alias("w")) \ > > &nbsp; &nbsp; &nbsp; &nbsp; .group_by("w") \ > > &nbsp; &nbsp; &nbsp; &nbsp; .select(" id,&nbsp; > time1 , time1 ")\ > > &nbsp; &nbsp; &nbsp; &nbsp; .insert_into("sink1") > > &nbsp; &nbsp;&nbsp; > > &nbsp; &nbsp; st_env.execute("2-from_kafka_to_kafka") > > > > > > 代码如上 > > > > > > > > > > > > > > > > > > ------------------&nbsp;原始邮件&nbsp;------------------ > > 发件人: > > > "user-zh" > > > < > > acqua....@gmail.com&gt;; > > 发送时间:&nbsp;2020年7月10日(星期五) 上午9:17 > > 收件人:&nbsp;"user-zh"<user-zh@flink.apache.org&gt;; > > > > 主题:&nbsp;Re: pyflink1.11.0window > > > > > > > > 琴师你好, > > > > 你的source ddl里有指定time1为 time attribute吗? > > create table source1( > > > &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; id > int, > > > &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; > time1 timestamp, > > > &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; type > string, > > > &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; > WATERMARK FOR time1 as time1 - > > INTERVAL '2' SECOND > > ) with (...) > > > > 奇怪的不朽琴师 <1129656...@qq.com&gt; 于2020年7月10日周五 上午8:43写道: > > > > &gt; > ------------------&amp;nbsp;原始邮件&amp;nbsp;------------------ > > &gt; 发件人: > > > &gt;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; > > "奇怪的不朽琴师" > > > &gt;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; > > < > > &gt; 1129656...@qq.com&amp;gt;; > > &gt; 发送时间:&amp;nbsp;2020年7月9日(星期四) 下午5:08 > > &gt; 收件人:&amp;nbsp;"godfrey he"<godfre...@gmail.com > &amp;gt;; > > &gt; > > &gt; 主题:&amp;nbsp;pyflink1.11.0window > > &gt; > > &gt; > > &gt; > > &gt; 你好: > > &gt; &amp;nbsp; &amp;nbsp;我在使用pyflink1.11版本时,window开窗仍会报错 > > &gt; : org.apache.flink.table.api.ValidationException: A group > window > > expects a > > &gt; time attribute for grouping in a stream environment. > > &gt; > > &gt; 请问这个问题没有修复么?或者是我使用的方式不对,如果是使用不对,能提供一个正确的案例么? > > &gt; 代码如下 > > &gt; 谢谢 > > &gt; > > &gt; > > &gt; def from_kafka_to_kafka_demo(): > > &gt; &amp;nbsp; &amp;nbsp; s_env = > > &gt; StreamExecutionEnvironment.get_execution_environment() > > &gt; &amp;nbsp; &amp;nbsp; s_env.set_parallelism(1) > > &gt; > > &gt; > > &gt; &amp;nbsp; &amp;nbsp; # use blink table planner > > &gt; &amp;nbsp; &amp;nbsp; st_env = > StreamTableEnvironment.create(s_env) > > &gt; > > &gt; > > &gt; &amp;nbsp; &amp;nbsp; # register source and sink > > &gt; &amp;nbsp; &amp;nbsp; register_rides_source(st_env) > > &gt; &amp;nbsp; &amp;nbsp; register_rides_sink(st_env) > > &gt; > > &gt; > > &gt; &amp;nbsp; &amp;nbsp; st_env.from_path("source1")\ > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; > > &gt; .window(Tumble.over("1.secends").on("time1").alias("w")) \ > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; > .group_by("w") \ > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; > .select(" id,&amp;nbsp; > > time1 , time1 ")\ > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; > .insert_into("sink1") > > &gt; &amp;nbsp; &amp;nbsp; > > &gt; &amp;nbsp; &amp;nbsp; > st_env.execute("2-from_kafka_to_kafka") > > &gt; > > &gt; > > &gt; > > &gt; > > &gt; def register_rides_source(st_env): > > &gt; &amp;nbsp; &amp;nbsp; source_ddl = \ > > &gt; &amp;nbsp; &amp;nbsp; ''' > > &gt; &amp;nbsp; &amp;nbsp; create table source1( > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; > id int, > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp;time1 timestamp, > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp;type string > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp;) with ( > > &gt; &amp;nbsp; &amp;nbsp; 'connector.type' = 'kafka', > > &gt; &amp;nbsp; &amp;nbsp; 'update-mode' = 'append', > > &gt; &amp;nbsp; &amp;nbsp; 'connector.topic' = 'tp1', > > &gt; &amp;nbsp; &amp;nbsp; > 'connector.properties.bootstrap.servers' = > > 'localhost:9092' > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp;) > > &gt; &amp;nbsp; &amp;nbsp; ''' > > &gt; &amp;nbsp; &amp;nbsp; st_env.sql_update(source_ddl) > > &gt; > > &gt; > > &gt; > > &gt; > > &gt; def register_rides_sink(st_env): > > &gt; &amp;nbsp; &amp;nbsp; sink_ddl = \ > > &gt; &amp;nbsp; &amp;nbsp; ''' > > &gt; &amp;nbsp; &amp;nbsp; create table sink1( > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; > id int, > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp;time1 timestamp, > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp;time2 timestamp > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp;) with ( > > &gt; &amp;nbsp; &amp;nbsp; 'connector.type' = 'kafka', > > &gt; &amp;nbsp; &amp;nbsp; 'update-mode' = 'append', > > &gt; &amp;nbsp; &amp;nbsp; 'connector.topic' = 'tp3', > > &gt; &amp;nbsp; &amp;nbsp; > 'connector.properties.bootstrap.servers' = > > 'localhost:9092' > > &gt; &amp;nbsp; &amp;nbsp; &amp;nbsp;) > > &gt; &amp;nbsp; &amp;nbsp; ''' > > &gt; &amp;nbsp; &amp;nbsp; st_env.sql_update(sink_ddl) > > &gt; > > &gt; > > &gt; > > &gt; > > &gt; if __name__ == '__main__': > > &gt; &amp;nbsp; &amp;nbsp; from_kafka_to_kafka_demo() > > &gt; > > &gt; > > &gt; &amp;nbsp; &amp;nbsp;