[scala] [streaming] WindowJoin scala example added
Project: http://git-wip-us.apache.org/repos/asf/flink/repo Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/c6b90eed Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/c6b90eed Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/c6b90eed Branch: refs/heads/release-0.8 Commit: c6b90eed5e2a3e1ce51bdca436a2bebc06a87ca7 Parents: 1413484 Author: Gyula Fora <[email protected]> Authored: Sun Dec 21 13:10:34 2014 +0100 Committer: mbalassi <[email protected]> Committed: Mon Jan 5 17:59:57 2015 +0100 ---------------------------------------------------------------------- .../scala/streaming/windowing/WindowJoin.scala | 72 ++++++++++++++++++++ .../streaming/StreamExecutionEnvironment.scala | 16 +++++ 2 files changed, 88 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/flink/blob/c6b90eed/flink-examples/flink-scala-examples/src/main/scala/org/apache/flink/examples/scala/streaming/windowing/WindowJoin.scala ---------------------------------------------------------------------- diff --git a/flink-examples/flink-scala-examples/src/main/scala/org/apache/flink/examples/scala/streaming/windowing/WindowJoin.scala b/flink-examples/flink-scala-examples/src/main/scala/org/apache/flink/examples/scala/streaming/windowing/WindowJoin.scala new file mode 100644 index 0000000..eea76c1 --- /dev/null +++ b/flink-examples/flink-scala-examples/src/main/scala/org/apache/flink/examples/scala/streaming/windowing/WindowJoin.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.examples.scala.streaming.windowing + +import org.apache.flink.api.scala._ +import org.apache.flink.api.scala.streaming.StreamExecutionEnvironment +import org.apache.flink.streaming.api.function.source.SourceFunction +import org.apache.flink.util.Collector +import scala.util.Random + +object WindowJoin { + + case class Name(id: Long, name: String) + case class Age(id: Long, age: Int) + case class Person(name: String, age: Long) + + def main(args: Array[String]) { + + val env = StreamExecutionEnvironment.getExecutionEnvironment + + //Create streams for names and ages by mapping the inputs to the corresponding objects + val names = env.addSource(nameStream _).map(x => Name(x._1, x._2)) + val ages = env.addSource(ageStream _).map(x => Age(x._1, x._2)) + + //Join the two input streams by id on the last second and create new Person objects + //containing both name and age + val joined = + names.join(ages).onWindow(1000) + .where("id").equalTo("id") { (n, a) => Person(n.name, a.age) } + + joined print + + env.execute("WindowJoin") + } + + //Stream source for generating (id, name) pairs + def nameStream(out: Collector[(Long, String)]) = { + val names = Array("tom", "jerry", "alice", "bob", "john", "grace") + + for (i <- 1 to 10000) { + if (i % 100 == 0) Thread.sleep(1000) else { + out.collect((i, names(Random.nextInt(names.length)))) + } + } + } + + //Stream source for generating (id, age) pairs + def ageStream(out: Collector[(Long, Int)]) = { + for (i <- 1 to 10000) { + if (i % 100 == 0) Thread.sleep(1000) else { + out.collect((i, Random.nextInt(90))) + } + } + } + +} http://git-wip-us.apache.org/repos/asf/flink/blob/c6b90eed/flink-scala/src/main/scala/org/apache/flink/api/scala/streaming/StreamExecutionEnvironment.scala ---------------------------------------------------------------------- diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/streaming/StreamExecutionEnvironment.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/streaming/StreamExecutionEnvironment.scala index 55f7c6c..2489a64 100644 --- a/flink-scala/src/main/scala/org/apache/flink/api/scala/streaming/StreamExecutionEnvironment.scala +++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/streaming/StreamExecutionEnvironment.scala @@ -161,9 +161,25 @@ class StreamExecutionEnvironment(javaEnv: JavaEnv) { */ def addSource[T: ClassTag: TypeInformation](function: SourceFunction[T]): DataStream[T] = { Validate.notNull(function, "Function must not be null.") + ClosureCleaner.clean(function, true) val typeInfo = implicitly[TypeInformation[T]] new DataStream[T](javaEnv.addSource(function, typeInfo)) } + + /** + * Create a DataStream using a user defined source function for arbitrary + * source functionality. + * + */ + def addSource[T: ClassTag: TypeInformation](function: Collector[T] => Unit): DataStream[T] = { + Validate.notNull(function, "Function must not be null.") + val sourceFunction = new SourceFunction[T] { + override def invoke(out: Collector[T]) { + function(out) + } + } + addSource(sourceFunction) + } /** * Triggers the program execution. The environment will execute all parts of
