[ https://issues.apache.org/jira/browse/FLINK-2105?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14628034#comment-14628034 ]
ASF GitHub Bot commented on FLINK-2105: --------------------------------------- Github user fhueske commented on a diff in the pull request: https://github.com/apache/flink/pull/907#discussion_r34676135 --- Diff: flink-runtime/src/main/java/org/apache/flink/runtime/operators/sort/ReusingMergeMatchIterator.java --- @@ -18,70 +18,20 @@ package org.apache.flink.runtime.operators.sort; -import java.io.IOException; -import java.util.Iterator; -import java.util.List; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.flink.api.common.functions.FlatJoinFunction; import org.apache.flink.api.common.typeutils.TypeComparator; import org.apache.flink.api.common.typeutils.TypePairComparator; import org.apache.flink.api.common.typeutils.TypeSerializer; -import org.apache.flink.core.memory.MemorySegment; import org.apache.flink.runtime.io.disk.iomanager.IOManager; import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable; import org.apache.flink.runtime.memorymanager.MemoryAllocationException; import org.apache.flink.runtime.memorymanager.MemoryManager; -import org.apache.flink.runtime.operators.resettable.NonReusingBlockResettableIterator; -import org.apache.flink.runtime.operators.resettable.SpillingResettableIterator; -import org.apache.flink.runtime.operators.util.JoinTaskIterator; +import org.apache.flink.runtime.util.KeyGroupedIterator; import org.apache.flink.runtime.util.ReusingKeyGroupedIterator; -import org.apache.flink.util.Collector; import org.apache.flink.util.MutableObjectIterator; -/** - * An implementation of the {@link JoinTaskIterator} that realizes the - * matching through a sort-merge join strategy. - */ -public class ReusingMergeMatchIterator<T1, T2, O> implements JoinTaskIterator<T1, T2, O> { - - /** - * The log used by this iterator to log messages. - */ - private static final Logger LOG = LoggerFactory.getLogger(ReusingMergeMatchIterator.class); - - // -------------------------------------------------------------------------------------------- - - private TypePairComparator<T1, T2> comp; - - private ReusingKeyGroupedIterator<T1> iterator1; - - private ReusingKeyGroupedIterator<T2> iterator2; - - private final TypeSerializer<T1> serializer1; - - private final TypeSerializer<T2> serializer2; - - private T1 copy1; - - private T1 spillHeadCopy; - - private T2 copy2; - - private T2 blockHeadCopy; - - private final NonReusingBlockResettableIterator<T2> blockIt; // for N:M cross products with same key - - private final List<MemorySegment> memoryForSpillingIterator; - - private final MemoryManager memoryManager; +public class ReusingMergeMatchIterator<T1, T2, O> extends AbstractMergeMatchIterator<T1, T2, O> { --- End diff -- Rename to ReusingMerge**InnerJoin**Iterator. > Implement Sort-Merge Outer Join algorithm > ----------------------------------------- > > Key: FLINK-2105 > URL: https://issues.apache.org/jira/browse/FLINK-2105 > Project: Flink > Issue Type: Sub-task > Components: Local Runtime > Reporter: Fabian Hueske > Assignee: Ricky Pogalz > Priority: Minor > Fix For: pre-apache > > > Flink does not natively support outer joins at the moment. > This issue proposes to implement a sort-merge outer join algorithm that can > cover left, right, and full outer joins. > The implementation can be based on the regular sort-merge join iterator > ({{ReusingMergeMatchIterator}} and {{NonReusingMergeMatchIterator}}, see also > {{MatchDriver}} class) > The Reusing and NonReusing variants differ in whether object instances are > reused or new objects are created. I would start with the NonReusing variant > which is safer from a user's point of view and should also be easier to > implement. -- This message was sent by Atlassian JIRA (v6.3.4#6332)