[ 
https://issues.apache.org/jira/browse/FLINK-2105?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14628034#comment-14628034
 ] 

ASF GitHub Bot commented on FLINK-2105:
---------------------------------------

Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/flink/pull/907#discussion_r34676135
  
    --- Diff: 
flink-runtime/src/main/java/org/apache/flink/runtime/operators/sort/ReusingMergeMatchIterator.java
 ---
    @@ -18,70 +18,20 @@
     
     package org.apache.flink.runtime.operators.sort;
     
    -import java.io.IOException;
    -import java.util.Iterator;
    -import java.util.List;
    -
    -import org.slf4j.Logger;
    -import org.slf4j.LoggerFactory;
    -import org.apache.flink.api.common.functions.FlatJoinFunction;
     import org.apache.flink.api.common.typeutils.TypeComparator;
     import org.apache.flink.api.common.typeutils.TypePairComparator;
     import org.apache.flink.api.common.typeutils.TypeSerializer;
    -import org.apache.flink.core.memory.MemorySegment;
     import org.apache.flink.runtime.io.disk.iomanager.IOManager;
     import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
     import org.apache.flink.runtime.memorymanager.MemoryAllocationException;
     import org.apache.flink.runtime.memorymanager.MemoryManager;
    -import 
org.apache.flink.runtime.operators.resettable.NonReusingBlockResettableIterator;
    -import 
org.apache.flink.runtime.operators.resettable.SpillingResettableIterator;
    -import org.apache.flink.runtime.operators.util.JoinTaskIterator;
    +import org.apache.flink.runtime.util.KeyGroupedIterator;
     import org.apache.flink.runtime.util.ReusingKeyGroupedIterator;
    -import org.apache.flink.util.Collector;
     import org.apache.flink.util.MutableObjectIterator;
     
     
    -/**
    - * An implementation of the {@link JoinTaskIterator} that realizes the
    - * matching through a sort-merge join strategy.
    - */
    -public class ReusingMergeMatchIterator<T1, T2, O> implements 
JoinTaskIterator<T1, T2, O> {
    -   
    -   /**
    -    * The log used by this iterator to log messages.
    -    */
    -   private static final Logger LOG = 
LoggerFactory.getLogger(ReusingMergeMatchIterator.class);
    -   
    -   // 
--------------------------------------------------------------------------------------------
    -   
    -   private TypePairComparator<T1, T2> comp;
    -   
    -   private ReusingKeyGroupedIterator<T1> iterator1;
    -
    -   private ReusingKeyGroupedIterator<T2> iterator2;
    -   
    -   private final TypeSerializer<T1> serializer1;
    -   
    -   private final TypeSerializer<T2> serializer2;
    -   
    -   private T1 copy1;
    -   
    -   private T1 spillHeadCopy;
    -   
    -   private T2 copy2;
    -   
    -   private T2 blockHeadCopy;
    -   
    -   private final NonReusingBlockResettableIterator<T2> blockIt;            
                // for N:M cross products with same key
    -   
    -   private final List<MemorySegment> memoryForSpillingIterator;
    -   
    -   private final MemoryManager memoryManager;
    +public class ReusingMergeMatchIterator<T1, T2, O> extends 
AbstractMergeMatchIterator<T1, T2, O> {
    --- End diff --
    
    Rename to ReusingMerge**InnerJoin**Iterator.


> Implement Sort-Merge Outer Join algorithm
> -----------------------------------------
>
>                 Key: FLINK-2105
>                 URL: https://issues.apache.org/jira/browse/FLINK-2105
>             Project: Flink
>          Issue Type: Sub-task
>          Components: Local Runtime
>            Reporter: Fabian Hueske
>            Assignee: Ricky Pogalz
>            Priority: Minor
>             Fix For: pre-apache
>
>
> Flink does not natively support outer joins at the moment. 
> This issue proposes to implement a sort-merge outer join algorithm that can 
> cover left, right, and full outer joins.
> The implementation can be based on the regular sort-merge join iterator 
> ({{ReusingMergeMatchIterator}} and {{NonReusingMergeMatchIterator}}, see also 
> {{MatchDriver}} class)
> The Reusing and NonReusing variants differ in whether object instances are 
> reused or new objects are created. I would start with the NonReusing variant 
> which is safer from a user's point of view and should also be easier to 
> implement.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to