Github user kl0u commented on a diff in the pull request:

    https://github.com/apache/flink/pull/2707#discussion_r86561357
  
    --- Diff: 
flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/operators/GenericWriteAheadSink.java
 ---
    @@ -77,111 +84,144 @@ public GenericWriteAheadSink(CheckpointCommitter 
committer, TypeSerializer<IN> s
        public void open() throws Exception {
                super.open();
                committer.setOperatorId(id);
    -           
committer.setOperatorSubtaskId(getRuntimeContext().getIndexOfThisSubtask());
                committer.open();
    -           cleanState();
    -           checkpointStreamFactory =
    -                           
getContainingTask().createCheckpointStreamFactory(this);
    +
    +           checkpointStreamFactory = getContainingTask()
    +                   .createCheckpointStreamFactory(this);
    +
    +           cleanRestoredHandles();
        }
     
        public void close() throws Exception {
                committer.close();
        }
     
        /**
    -    * Saves a handle in the state.
    +    * Called when a checkpoint barrier arrives. It closes any open streams 
to the backend
    +    * and marks them as pending for committing to the external, 
third-party storage system.
         *
    -    * @param checkpointId
    -    * @throws IOException
    +    * @param checkpointId the id of the latest received checkpoint.
    +    * @throws IOException in case something went wrong when handling the 
stream to the backend.
         */
        private void saveHandleInState(final long checkpointId, final long 
timestamp) throws Exception {
    +           Preconditions.checkNotNull(this.pendingCheckpoints, "The 
operator has not been properly initialized.");
    +
                //only add handle if a new OperatorState was created since the 
last snapshot
                if (out != null) {
    +                   int subtaskIdx = 
getRuntimeContext().getIndexOfThisSubtask();
                        StreamStateHandle handle = out.closeAndGetHandle();
    -                   if (state.pendingHandles.containsKey(checkpointId)) {
    +
    +                   PendingCheckpoint pendingCheckpoint = new 
PendingCheckpoint(
    +                           checkpointId, subtaskIdx, timestamp, handle);
    +
    +                   if (pendingCheckpoints.contains(pendingCheckpoint)) {
                                //we already have a checkpoint stored for that 
ID that may have been partially written,
                                //so we discard this "alternate version" and 
use the stored checkpoint
                                handle.discardState();
                        } else {
    -                           state.pendingHandles.put(checkpointId, new 
Tuple2<>(timestamp, handle));
    +                           pendingCheckpoints.add(pendingCheckpoint);
                        }
                        out = null;
                }
        }
     
        @Override
    -   public void snapshotState(FSDataOutputStream out,
    -                   long checkpointId,
    -                   long timestamp) throws Exception {
    +   public void snapshotState(FSDataOutputStream out, long checkpointId, 
long timestamp) throws Exception {
                saveHandleInState(checkpointId, timestamp);
     
    -           InstantiationUtil.serializeObject(out, state);
    +           DataOutputViewStreamWrapper outStream = new 
DataOutputViewStreamWrapper(out);
    +           outStream.writeInt(pendingCheckpoints.size());
    +           for (PendingCheckpoint pendingCheckpoint : pendingCheckpoints) {
    +                   pendingCheckpoint.serialize(outStream);
    +           }
        }
     
        @Override
        public void restoreState(FSDataInputStream in) throws Exception {
    -           this.state = InstantiationUtil.deserializeObject(in, 
getUserCodeClassloader());
    +           final DataInputViewStreamWrapper inStream = new 
DataInputViewStreamWrapper(in);
    +           int numPendingHandles = inStream.readInt();
    +           for (int i = 0; i < numPendingHandles; i++) {
    +                   
pendingCheckpoints.add(PendingCheckpoint.restore(inStream, 
getUserCodeClassloader()));
    +           }
        }
     
    -   private void cleanState() throws Exception {
    -           synchronized (this.state.pendingHandles) { //remove all handles 
that were already committed
    -                   Set<Long> pastCheckpointIds = 
this.state.pendingHandles.keySet();
    -                   Set<Long> checkpointsToRemove = new HashSet<>();
    -                   for (Long pastCheckpointId : pastCheckpointIds) {
    -                           if 
(committer.isCheckpointCommitted(pastCheckpointId)) {
    -                                   
checkpointsToRemove.add(pastCheckpointId);
    +   /**
    +    * Called at {@link #open()} to clean-up the pending handle list.
    +    * It iterates over all restored pending handles, checks which ones are 
already
    +    * committed to the outside storage system and removes them from the 
list.
    +    */
    +   private void cleanRestoredHandles() throws Exception {
    +           synchronized (pendingCheckpoints) {
    +
    +                   // for each of the pending handles...
    +                   Iterator<PendingCheckpoint> pendingCheckpointIt = 
pendingCheckpoints.iterator();
    +                   while (pendingCheckpointIt.hasNext()) {
    +
    +                           PendingCheckpoint pendingCheckpoint = 
pendingCheckpointIt.next();
    +                           long checkpointId = 
pendingCheckpoint.checkpointId;
    +                           int subtaskId = pendingCheckpoint.subtaskId;
    +
    +                           //...check if the temporary buffer is already 
committed and if yes,
    +                           // remove it from the list of pending 
checkpoints.
    +                           if (committer.isCheckpointCommitted(subtaskId, 
checkpointId)) {
    +                                   
pendingCheckpoint.stateHandle.discardState();
    +                                   pendingCheckpointIt.remove();
                                }
                        }
    -                   for (Long toRemove : checkpointsToRemove) {
    -                           this.state.pendingHandles.remove(toRemove);
    -                   }
                }
        }
     
        @Override
        public void notifyOfCompletedCheckpoint(long checkpointId) throws 
Exception {
                super.notifyOfCompletedCheckpoint(checkpointId);
     
    -           synchronized (state.pendingHandles) {
    -                   Set<Long> pastCheckpointIds = 
state.pendingHandles.keySet();
    -                   Set<Long> checkpointsToRemove = new HashSet<>();
    -                   for (Long pastCheckpointId : pastCheckpointIds) {
    +           synchronized (pendingCheckpoints) {
    +                   Iterator<PendingCheckpoint> pendingCheckpointIt = 
pendingCheckpoints.iterator();
    +                   while (pendingCheckpointIt.hasNext()) {
    +
    +                           PendingCheckpoint pendingCheckpoint = 
pendingCheckpointIt.next();
    +                           long pastCheckpointId = 
pendingCheckpoint.checkpointId;
    +                           int subtaskId = pendingCheckpoint.subtaskId;
    +                           long timestamp = pendingCheckpoint.timestamp;
    +                           StreamStateHandle streamHandle = 
pendingCheckpoint.stateHandle;
    +
                                if (pastCheckpointId <= checkpointId) {
                                        try {
    -                                           if 
(!committer.isCheckpointCommitted(pastCheckpointId)) {
    -                                                   Tuple2<Long, 
StreamStateHandle> handle = state.pendingHandles.get(pastCheckpointId);
    -                                                   try (FSDataInputStream 
in = handle.f1.openInputStream()) {
    +                                           if 
(!committer.isCheckpointCommitted(subtaskId, pastCheckpointId)) {
    +                                                   try (FSDataInputStream 
in = streamHandle.openInputStream()) {
                                                                boolean success 
= sendValues(
                                                                                
new ReusingMutableToRegularIteratorWrapper<>(
                                                                                
                new InputViewIterator<>(
                                                                                
                                new DataInputViewStreamWrapper(
                                                                                
                                                in),
                                                                                
                                serializer),
                                                                                
                serializer),
    -                                                                           
handle.f0);
    -                                                           if (success) { 
//if the sending has failed we will retry on the next notify
    -                                                                   
committer.commitCheckpoint(pastCheckpointId);
    -                                                                   
checkpointsToRemove.add(pastCheckpointId);
    +                                                                           
timestamp);
    +                                                           if (success) {
    +
    +                                                                   // in 
case the checkpoint was successfully committed,
    +                                                                   // 
discard its state from the backend and mark it for removal
    +                                                                   // in 
case it failed, we retry on the next checkpoint
    +                                                                   
committer.commitCheckpoint(subtaskId, pastCheckpointId);
    +                                                                   
streamHandle.discardState();
    +                                                                   
pendingCheckpointIt.remove();
    --- End diff --
    
    No "The iterators returned by this class's iterator method are fail-fast: 
if the set is modified at any time after the iterator is created, in any way 
**except through the iterator's own remove method**" from the same javadocs.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to