zentol commented on a change in pull request #6477: [FLINK-10027] Add logging
to StreamingFileSink
URL: https://github.com/apache/flink/pull/6477#discussion_r207226450
##########
File path:
flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/Bucket.java
##########
@@ -53,175 +58,260 @@
private final RecoverableWriter fsWriter;
- private final Map<Long, List<RecoverableWriter.CommitRecoverable>>
pendingPerCheckpoint = new HashMap<>();
-
- private long partCounter;
-
- private PartFileWriter<IN, BucketID> currentPart;
+ private final RollingPolicy<IN, BucketID> rollingPolicy;
- private List<RecoverableWriter.CommitRecoverable> pending;
-
- /**
- * Constructor to restore a bucket from checkpointed state.
- */
- public Bucket(
- RecoverableWriter fsWriter,
- int subtaskIndex,
- long initialPartCounter,
- PartFileWriter.PartFileFactory<IN, BucketID>
partFileFactory,
- BucketState<BucketID> bucketState) throws IOException {
+ private final Map<Long, List<RecoverableWriter.CommitRecoverable>>
pendingPartsPerCheckpoint = new HashMap<>();
- this(fsWriter, subtaskIndex, bucketState.getBucketId(),
bucketState.getBucketPath(), initialPartCounter, partFileFactory);
-
- // the constructor must have already initialized the filesystem
writer
- Preconditions.checkState(fsWriter != null);
-
- // we try to resume the previous in-progress file, if the
filesystem
- // supports such operation. If not, we just commit the file and
start fresh.
+ private long partCounter;
- final RecoverableWriter.ResumeRecoverable resumable =
bucketState.getInProgress();
- if (resumable != null) {
- currentPart = partFileFactory.resumeFrom(
- bucketId, fsWriter, resumable,
bucketState.getCreationTime());
- }
+ private PartFileWriter<IN, BucketID> inProgressPart;
- // we commit pending files for previous checkpoints to the last
successful one
- // (from which we are recovering from)
- for (List<RecoverableWriter.CommitRecoverable> commitables:
bucketState.getPendingPerCheckpoint().values()) {
- for (RecoverableWriter.CommitRecoverable commitable:
commitables) {
-
fsWriter.recoverForCommit(commitable).commitAfterRecovery();
- }
- }
- }
+ private List<RecoverableWriter.CommitRecoverable>
pendingPartsForCurrentCheckpoint;
/**
* Constructor to create a new empty bucket.
*/
- public Bucket(
- RecoverableWriter fsWriter,
- int subtaskIndex,
- BucketID bucketId,
- Path bucketPath,
- long initialPartCounter,
- PartFileWriter.PartFileFactory<IN, BucketID>
partFileFactory) {
+ private Bucket(
+ final RecoverableWriter fsWriter,
+ final int subtaskIndex,
+ final BucketID bucketId,
+ final Path bucketPath,
+ final long initialPartCounter,
+ final PartFileWriter.PartFileFactory<IN, BucketID>
partFileFactory,
+ final RollingPolicy<IN, BucketID> rollingPolicy) {
this.fsWriter = Preconditions.checkNotNull(fsWriter);
this.subtaskIndex = subtaskIndex;
this.bucketId = Preconditions.checkNotNull(bucketId);
this.bucketPath = Preconditions.checkNotNull(bucketPath);
this.partCounter = initialPartCounter;
+ this.rollingPolicy = Preconditions.checkNotNull(rollingPolicy);
this.partFileFactory =
Preconditions.checkNotNull(partFileFactory);
- this.pending = new ArrayList<>();
+ this.pendingPartsForCurrentCheckpoint = new ArrayList<>();
}
/**
- * Gets the information available for the currently
- * open part file, i.e. the one we are currently writing to.
- *
- * <p>This will be null if there is no currently open part file. This
- * is the case when we have a new, just created bucket or a bucket
- * that has not received any data after the closing of its previously
- * open in-progress file due to the specified rolling policy.
- *
- * @return The information about the currently in-progress part file
- * or {@code null} if there is no open part file.
+ * Constructor to restore a bucket from checkpointed state.
*/
- public PartFileInfo<BucketID> getInProgressPartInfo() {
- return currentPart;
+ private Bucket(
+ final RecoverableWriter fsWriter,
+ final int subtaskIndex,
+ final long initialPartCounter,
+ final PartFileWriter.PartFileFactory<IN, BucketID>
partFileFactory,
+ final RollingPolicy<IN, BucketID> rollingPolicy,
+ final BucketState<BucketID> bucketState) throws
IOException {
+
+ this(
+ fsWriter,
+ subtaskIndex,
+ bucketState.getBucketId(),
+ bucketState.getBucketPath(),
+ initialPartCounter,
+ partFileFactory,
+ rollingPolicy);
+
+ restoreInProgressFile(bucketState);
+ commitRecoveredPendingFiles(bucketState);
+ }
+
+ private void restoreInProgressFile(final BucketState<BucketID> state)
throws IOException {
+ if (state.hasInProgressResumableFile()) {
+ final RecoverableWriter.ResumeRecoverable resumable =
state.getInProgressResumableFile();
+ inProgressPart = partFileFactory.resumeFrom(
+ bucketId, fsWriter, resumable,
state.getInProgressFileCreationTime());
+ }
+ }
+
+ private void commitRecoveredPendingFiles(final BucketState<BucketID>
state) throws IOException {
+ for (List<RecoverableWriter.CommitRecoverable> committables:
state.getCommittableFilesPerCheckpoint().values()) {
+ for (RecoverableWriter.CommitRecoverable committable:
committables) {
+
fsWriter.recoverForCommit(committable).commitAfterRecovery();
+ }
+ }
+ }
+
+ void merge(final Bucket<IN, BucketID> bucket) throws IOException {
Review comment:
let's move this below the getter methods.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services