[ https://issues.apache.org/jira/browse/FLINK-3332?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15148439#comment-15148439 ]
ASF GitHub Bot commented on FLINK-3332: --------------------------------------- Github user zentol commented on a diff in the pull request: https://github.com/apache/flink/pull/1640#discussion_r52994610 --- Diff: flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/operators/GenericExactlyOnceSink.java --- @@ -0,0 +1,195 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.flink.streaming.runtime.operators; + +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.java.typeutils.TypeExtractor; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.runtime.io.disk.InputViewIterator; +import org.apache.flink.runtime.state.AbstractStateBackend; +import org.apache.flink.runtime.state.StateHandle; +import org.apache.flink.runtime.util.NonReusingMutableToRegularIteratorWrapper; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.tasks.StreamTaskState; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashSet; +import java.util.Set; +import java.util.TreeMap; +import java.util.UUID; + +/** + * Generic Sink that emits its input elements into an arbitrary backend. This sink is integrated with the checkpointing + * mechanism to provide near exactly-once semantics. + * <p/> + * Incoming records are stored within a {@link org.apache.flink.runtime.state.AbstractStateBackend}, and only committed if a + * checkpoint is completed. Should a job fail while the data is being committed, no exactly-once guarantee can be made. + * + * @param <IN> Type of the elements emitted by this sink + */ +public abstract class GenericExactlyOnceSink<IN> extends AbstractStreamOperator<IN> implements OneInputStreamOperator<IN, IN> { + private transient AbstractStateBackend.CheckpointStateOutputView out; + private TypeSerializer<IN> serializer; + protected transient TypeInformation<IN> typeInfo; + protected final CheckpointCommitter committer; + protected final String id; + + private ExactlyOnceState state = new ExactlyOnceState(); + + public GenericExactlyOnceSink(CheckpointCommitter committer) { + if (committer == null) { + throw new IllegalArgumentException("CheckpointCommitter argument must not be null."); + } + this.committer = committer; + this.id = UUID.randomUUID().toString(); + } + + @Override + public void open() throws Exception { + committer.setOperatorId(id); + committer.setOperatorSubtaskId(getRuntimeContext().getIndexOfThisSubtask()); + committer.open(); + } + + public void close() throws Exception { + committer.close(); + } + + /** + * Saves a handle in the state. + * + * @param checkpointId + * @throws IOException + */ + private void saveHandleInState(final long checkpointId) throws IOException { + //only add handle if a new OperatorState was created since the last snapshot + if (out != null) { + StateHandle<DataInputView> handle = out.closeAndGetHandle(); + state.pendingHandles.put(checkpointId, handle); + out = null; + } + } + + @Override + public StreamTaskState snapshotOperatorState(final long checkpointId, final long timestamp) throws Exception { + StreamTaskState taskState = super.snapshotOperatorState(checkpointId, timestamp); + saveHandleInState(checkpointId); + taskState.setFunctionState(state); + return taskState; + } + + @Override + public void restoreState(StreamTaskState state, long recoveryTimestamp) throws Exception { + super.restoreState(state, recoveryTimestamp); + this.state = (ExactlyOnceState) state.getFunctionState(); + out = null; + } + + @Override + public void notifyOfCompletedCheckpoint(long checkpointId) throws Exception { + super.notifyOfCompletedCheckpoint(checkpointId); + + synchronized (state.pendingHandles) { + Set<Long> pastCheckpointIds = state.pendingHandles.keySet(); + Set<Long> checkpointsToRemove = new HashSet<>(); + for (Long pastCheckpointId : pastCheckpointIds) { + if (pastCheckpointId <= checkpointId) { + if (!committer.isCheckpointCommitted(pastCheckpointId)) { + StateHandle<DataInputView> handle = state.pendingHandles.get(pastCheckpointId); + DataInputView in = handle.getState(getUserCodeClassloader()); + sendValue(new NonReusingMutableToRegularIteratorWrapper<>(new InputViewIterator<>(in, serializer), serializer)); + committer.commitCheckpoint(pastCheckpointId); + } + checkpointsToRemove.add(pastCheckpointId); + } + } + for (Long toRemove : checkpointsToRemove) { + StateHandle<DataInputView> handle = state.pendingHandles.get(toRemove); + state.pendingHandles.remove(toRemove); + handle.discardState(); + } + } + } + + + /** + * Write the given element into the backend. + * + * @param value value to be written + * @throws Exception + */ + protected abstract void sendValue(Iterable<IN> value) throws Exception; + + @Override + public void processElement(StreamRecord<IN> element) throws Exception { + IN value = element.getValue(); + if (serializer == null) { + typeInfo = TypeExtractor.getForObject(value); --- End diff -- so we're once again at a point where the user has to pass input TypeInformation manually, i thought we didn't want that? > Provide an exactly-once Cassandra connector > ------------------------------------------- > > Key: FLINK-3332 > URL: https://issues.apache.org/jira/browse/FLINK-3332 > Project: Flink > Issue Type: Improvement > Components: Streaming Connectors > Reporter: Robert Metzger > Assignee: Chesnay Schepler > > With FLINK-3311, we are adding a Cassandra connector to Flink. > It would be good to also provide an "exactly-once" C* connector. > I would like to first discuss how we are going to implement this in Flink. -- This message was sent by Atlassian JIRA (v6.3.4#6332)