[
https://issues.apache.org/jira/browse/STORM-1199?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15089527#comment-15089527
]
ASF GitHub Bot commented on STORM-1199:
---------------------------------------
Github user revans2 commented on a diff in the pull request:
https://github.com/apache/storm/pull/936#discussion_r49211367
--- Diff:
external/storm-hdfs/src/main/java/org/apache/storm/hdfs/spout/FileLock.java ---
@@ -0,0 +1,328 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.storm.hdfs.spout;
+
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.storm.hdfs.common.HdfsUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Collection;
+
+/**
+ * Facility to synchronize access to HDFS files. Thread gains exclusive
access to a file by acquiring
+ * a FileLock object. The lock itself is represented as file on HDFS.
Relies on atomic file creation.
+ * Owning thread must heartbeat periodically on the lock to prevent the
lock from being deemed as
+ * stale (i.e. lock whose owning thread have died).
+ */
+public class FileLock {
+
+ private final FileSystem fs;
+ private final String componentID;
+ private final Path lockFile;
+ private final FSDataOutputStream lockFileStream;
+ private LogEntry lastEntry;
+
+ private static final Logger LOG =
LoggerFactory.getLogger(FileLock.class);
+
+ private FileLock(FileSystem fs, Path lockFile, FSDataOutputStream
lockFileStream, String spoutId)
+ throws IOException {
+ this.fs = fs;
+ this.lockFile = lockFile;
+ this.lockFileStream = lockFileStream;
+ this.componentID = spoutId;
+ logProgress("0", false);
+ }
+
+ private FileLock(FileSystem fs, Path lockFile, String spoutId, LogEntry
entry)
+ throws IOException {
+ this.fs = fs;
+ this.lockFile = lockFile;
+ this.lockFileStream = fs.append(lockFile);
+ this.componentID = spoutId;
+ LOG.info("Acquired abandoned lockFile {}, Spout {}", lockFile,
spoutId);
+ logProgress(entry.fileOffset, true);
+ }
+
+ public void heartbeat(String fileOffset) throws IOException {
+ logProgress(fileOffset, true);
+ }
+
+ // new line is at beginning of each line (instead of end) for better
recovery from
+ // partial writes of prior lines
+ private void logProgress(String fileOffset, boolean prefixNewLine)
+ throws IOException {
+ long now = System.currentTimeMillis();
+ LogEntry entry = new LogEntry(now, componentID, fileOffset);
+ String line = entry.toString();
+ if(prefixNewLine) {
+ lockFileStream.writeBytes(System.lineSeparator() + line);
+ }
+ else {
+ lockFileStream.writeBytes(line);
+ }
+ lockFileStream.hflush();
+
+ lastEntry = entry; // update this only after writing to hdfs
+ }
+
+ /** Release lock by deleting file
+ * @throws IOException if lock file could not be deleted
+ */
+ public void release() throws IOException {
+ lockFileStream.close();
+ if(!fs.delete(lockFile, false)) {
+ LOG.warn("Unable to delete lock file, Spout = {}", componentID);
+ throw new IOException("Unable to delete lock file");
+ }
+ LOG.debug("Released lock file {}. Spout {}", lockFile, componentID);
+ }
+
+ // For testing only.. invoked via reflection
+ private void forceCloseLockFile() throws IOException {
+ lockFileStream.close();
+ }
+
+ /** returns lock on file or null if file is already locked. throws if
unexpected problem */
+ public static FileLock tryLock(FileSystem fs, Path fileToLock, Path
lockDirPath, String spoutId)
+ throws IOException {
+ String lockFileName = lockDirPath.toString() + Path.SEPARATOR_CHAR +
fileToLock.getName();
+ Path lockFile = new Path(lockFileName);
+
+ try {
+ FSDataOutputStream ostream = HdfsUtils.tryCreateFile(fs, lockFile);
+ if (ostream != null) {
+ LOG.debug("Acquired lock on file {}. LockFile= {}, Spout = {}",
fileToLock, lockFile, spoutId);
+ return new FileLock(fs, lockFile, ostream, spoutId);
+ } else {
+ LOG.debug("Cannot lock file {} as its already locked. Spout = {}",
fileToLock, spoutId);
+ return null;
+ }
+ } catch (IOException e) {
+ LOG.error("Error when acquiring lock on file " + fileToLock + "
Spout = " + spoutId, e);
+ throw e;
+ }
+ }
+
+ /**
+ * checks if lockFile is older than 'olderThan' UTC time by examining
the modification time
+ * on file and (if necessary) the timestamp in last log entry in the
file. If its stale, then
+ * returns the last log entry, else returns null.
+ * @param fs
+ * @param lockFile
+ * @param olderThan time (millis) in UTC.
+ * @return the last entry in the file if its too old. null if last entry
is not too old
+ * @throws IOException
+ */
+ public static LogEntry getLastEntryIfStale(FileSystem fs, Path lockFile,
long olderThan)
+ throws IOException {
+ long modifiedTime = fs.getFileStatus(lockFile).getModificationTime();
+ if( modifiedTime <= olderThan ) { // look
+ //Impt: HDFS timestamp may not reflect recent appends, so we double
check the
+ // timestamp in last line of file to see when the last update was
made
+ LogEntry lastEntry = getLastEntry(fs, lockFile);
+ if(lastEntry==null) {
+ throw new RuntimeException(lockFile.getName() + " is empty. this
file is invalid.");
--- End diff --
Is blowing up the right thing to do when the file may be in the middle of
being written. or the writer failed at a bad time?
> Create HDFS Spout
> -----------------
>
> Key: STORM-1199
> URL: https://issues.apache.org/jira/browse/STORM-1199
> Project: Apache Storm
> Issue Type: New Feature
> Reporter: Roshan Naik
> Assignee: Roshan Naik
> Attachments: HDFSSpoutforStorm v2.pdf, HDFSSpoutforStorm.pdf,
> hdfs-spout.1.patch
>
>
> Create an HDFS spout so that Storm can suck in data from files in a HDFS
> directory
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)