[ 
https://issues.apache.org/jira/browse/FLINK-5886?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16019496#comment-16019496
 ] 

ASF GitHub Bot commented on FLINK-5886:
---------------------------------------

Github user zentol commented on a diff in the pull request:

    https://github.com/apache/flink/pull/3838#discussion_r117724239
  
    --- Diff: 
flink-libraries/flink-streaming-python/src/main/java/org/apache/flink/streaming/python/api/datastream/PythonDataStream.java
 ---
    @@ -0,0 +1,262 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.flink.streaming.python.api.datastream;
    +
    +import org.apache.flink.annotation.Public;
    +import org.apache.flink.annotation.PublicEvolving;
    +import org.apache.flink.api.common.functions.FilterFunction;
    +import org.apache.flink.api.common.functions.FlatMapFunction;
    +import org.apache.flink.api.common.functions.MapFunction;
    +import org.apache.flink.api.java.functions.KeySelector;
    +import org.apache.flink.core.fs.FileSystem.WriteMode;
    +import org.apache.flink.streaming.api.collector.selector.OutputSelector;
    +import org.apache.flink.streaming.api.datastream.DataStream;
    +import 
org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
    +import org.apache.flink.streaming.api.functions.sink.SinkFunction;
    +import org.apache.flink.streaming.python.api.functions.PyKey;
    +import 
org.apache.flink.streaming.python.api.functions.PythonFilterFunction;
    +import 
org.apache.flink.streaming.python.api.functions.PythonFlatMapFunction;
    +import org.apache.flink.streaming.python.api.functions.PythonKeySelector;
    +import org.apache.flink.streaming.python.api.functions.PythonMapFunction;
    +import 
org.apache.flink.streaming.python.api.functions.PythonOutputSelector;
    +import org.apache.flink.streaming.python.api.functions.PythonSinkFunction;
    +import 
org.apache.flink.streaming.python.util.serialization.PythonSerializationSchema;
    +import org.apache.flink.streaming.util.serialization.SerializationSchema;
    +import org.python.core.PyObject;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +
    +
    +/**
    + * A {@code PythonDataStream} is a thin wrapper layer over {@link 
DataStream}, which represents a
    + * stream of elements of the same type. A {@code PythonDataStream} can be 
transformed into
    + * another {@code PythonDataStream} by applying various transformation 
functions, such as
    + * <ul>
    + * <li>{@link PythonDataStream#map}
    + * <li>{@link PythonDataStream#split}
    + * </ul>
    + *
    + * <p>A thin wrapper layer means that the functionality itself is 
performed by the
    + * {@link DataStream}, however instead of working directly with the 
streaming data sets,
    + * this layer handles Python wrappers (e.g. {@code PythonDataStream}) to 
comply with the
    + * Python standard coding styles.</p>
    + */
    +@Public
    +public class PythonDataStream<D extends DataStream<PyObject>> {
    +   protected final D stream;
    +
    +   public PythonDataStream(D stream) {
    +           this.stream = stream;
    +   }
    +
    +   /**
    +    * A thin wrapper layer over {@link DataStream#union(DataStream[])}.
    +    *
    +    * @param streams
    +    *            The Python DataStreams to union output with.
    +    * @return The {@link PythonDataStream}.
    +    */
    +   @SafeVarargs
    +   @SuppressWarnings("unchecked")
    +   public final PythonDataStream union(PythonDataStream... streams) {
    +           ArrayList<DataStream<PyObject>> dsList = new ArrayList<>();
    +           for (PythonDataStream ps : streams) {
    +                   dsList.add(ps.stream);
    +           }
    +           DataStream<PyObject>[] dsArray = new DataStream[dsList.size()];
    +           return new 
PythonDataStream(stream.union(dsList.toArray(dsArray)));
    +   }
    +
    +   /**
    +    * A thin wrapper layer over {@link DataStream#split(OutputSelector)}.
    +    *
    +    * @param output_selector
    +    *            The user defined {@link OutputSelector} for directing the 
tuples.
    +    * @return The {@link PythonSplitStream}
    +    */
    +   public PythonSplitStream split(OutputSelector<PyObject> 
output_selector) throws IOException {
    +           return new PythonSplitStream(this.stream.split(new 
PythonOutputSelector(output_selector)));
    +   }
    +
    +   /**
    +    * A thin wrapper layer over {@link DataStream#filter(FilterFunction)}.
    +    *
    +    * @param filter
    +    *            The FilterFunction that is called for each element of the 
DataStream.
    +    * @return The filtered {@link PythonDataStream}.
    +    */
    +   public PythonSingleOutputStreamOperator filter(FilterFunction<PyObject> 
filter) throws IOException {
    +           return new PythonSingleOutputStreamOperator(stream.filter(new 
PythonFilterFunction(filter)));
    +   }
    +
    +   /**
    +    * A thin wrapper layer over {@link DataStream#map(MapFunction)}.
    +    *
    +    * @param mapper
    +    *            The MapFunction that is called for each element of the
    +    *            DataStream.
    +    * @return The transformed {@link PythonDataStream}.
    +    */
    +   public PythonDataStream<SingleOutputStreamOperator<PyObject>> map(
    +           MapFunction<PyObject, PyObject> mapper) throws IOException {
    +           return new PythonDataStream<>(stream.map(new 
PythonMapFunction(mapper)));
    +   }
    +
    +   /**
    +    * A thin wrapper layer over {@link 
DataStream#flatMap(FlatMapFunction)}.
    +    *
    +    * @param flat_mapper
    +    *            The FlatMapFunction that is called for each element of the
    +    *            DataStream
    +    *
    +    * @return The transformed {@link PythonDataStream}.
    +    */
    +   public PythonDataStream<SingleOutputStreamOperator<PyObject>> flat_map(
    +           FlatMapFunction<PyObject, PyObject> flat_mapper) throws 
IOException {
    +           return new PythonDataStream<>(stream.flatMap(new 
PythonFlatMapFunction(flat_mapper)));
    +   }
    +
    +   /**
    +    * A thin wrapper layer over {@link DataStream#keyBy(KeySelector)}.
    +    *
    +    * @param selector
    +    *            The KeySelector to be used for extracting the key for 
partitioning
    +    * @return The {@link PythonDataStream} with partitioned state (i.e. 
{@link PythonKeyedStream})
    +    */
    +   public PythonKeyedStream key_by(KeySelector<PyObject, PyKey> selector) 
throws IOException {
    +           return new PythonKeyedStream(stream.keyBy(new 
PythonKeySelector(selector)));
    +   }
    +
    +   /**
    +    * A thin wrapper layer over {@link DataStream#print()}.
    +    */
    +   @PublicEvolving
    +   public void print() { stream.print(); }
    --- End diff --
    
    `stream.print();` should be on a separate line.


> Python API for streaming applications
> -------------------------------------
>
>                 Key: FLINK-5886
>                 URL: https://issues.apache.org/jira/browse/FLINK-5886
>             Project: Flink
>          Issue Type: New Feature
>          Components: Python API
>            Reporter: Zohar Mizrahi
>            Assignee: Zohar Mizrahi
>
> A work in progress to provide python interface for Flink streaming APIs. The 
> core technology is based on jython and thus imposes two limitations: a. user 
> defined functions cannot use python extensions. b. the python version is 2.x
> The branch is based on Flink release 1.2.0, as can be found here:
> https://github.com/zohar-pm/flink/tree/python-streaming
> In order to test it, someone can use IntelliJ IDE. Assuming IntelliJ was 
> setup properly (see: 
> https://ci.apache.org/projects/flink/flink-docs-release-1.3/internals/ide_setup.html),
>  one can run/debug {{org.apache.flink.python.api.PythonStreamBinderTest}}, 
> which in return will execute all the tests under 
> {{/Users/zohar/dev/pm-flink/flink-libraries/flink-python/src/test/python/org/apache/flink/python/api/streaming}}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to