Repository: hive Updated Branches: refs/heads/master f973243a5 -> acb3ba274
HIVE-17825: Socket not closed when trying to read files to copy over in replication from metadata (Anishek Agarwal reviewed by Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/acb3ba27 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/acb3ba27 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/acb3ba27 Branch: refs/heads/master Commit: acb3ba274befb343ce30c8ac3b0c4d73bb74c0ef Parents: f973243 Author: Anishek Agarwal <anis...@gmail.com> Authored: Wed Oct 18 11:54:56 2017 +0530 Committer: Anishek Agarwal <anis...@gmail.com> Committed: Wed Oct 18 11:54:56 2017 +0530 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/ReplCopyTask.java | 55 +++++++++++--------- 1 file changed, 29 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/acb3ba27/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java index c69741b..80905d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java @@ -144,10 +144,13 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable { if (dstFs.exists(destFile)) { String destFileWithSourceName = srcFile.getSourcePath().getName(); Path newDestFile = new Path(toPath, destFileWithSourceName); - dstFs.rename(destFile, newDestFile); + boolean result = dstFs.rename(destFile, newDestFile); + if (!result) { + throw new IllegalStateException( + "could not rename " + destFile.getName() + " to " + newDestFile.getName()); + } } } - return 0; } catch (Exception e) { console.printError("Failed with exception " + e.getMessage(), "\n" @@ -167,32 +170,32 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable { } List<ReplChangeManager.FileInfo> filePaths = new ArrayList<>(); - BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fileListing))); - // TODO : verify if skipping charset here is okay - - String line = null; - while ((line = br.readLine()) != null) { - LOG.debug("ReplCopyTask :_filesReadLine:" + line); - - String[] fileWithChksum = ReplChangeManager.getFileWithChksumFromURI(line); - try { - ReplChangeManager.FileInfo f = ReplChangeManager - .getFileInfo(new Path(fileWithChksum[0]), fileWithChksum[1], conf); - filePaths.add(f); - } catch (MetaException e) { - // issue warning for missing file and throw exception - LOG.warn("Cannot find " + fileWithChksum[0] + " in source repo or cmroot"); - throw new IOException(e.getMessage()); + try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fileListing)))) { + // TODO : verify if skipping charset here is okay + + String line = null; + while ((line = br.readLine()) != null) { + LOG.debug("ReplCopyTask :_filesReadLine:" + line); + + String[] fileWithChksum = ReplChangeManager.getFileWithChksumFromURI(line); + try { + ReplChangeManager.FileInfo f = ReplChangeManager + .getFileInfo(new Path(fileWithChksum[0]), fileWithChksum[1], conf); + filePaths.add(f); + } catch (MetaException e) { + // issue warning for missing file and throw exception + LOG.warn("Cannot find " + fileWithChksum[0] + " in source repo or cmroot"); + throw new IOException(e.getMessage()); + } + // Note - we need srcFs rather than fs, because it is possible that the _files lists files + // which are from a different filesystem than the fs where the _files file itself was loaded + // from. Currently, it is possible, for eg., to do REPL LOAD hdfs://<ip>/dir/ and for the _files + // in it to contain hdfs://<name>/ entries, and/or vice-versa, and this causes errors. + // It might also be possible that there will be a mix of them in a given _files file. + // TODO: revisit close to the end of replv2 dev, to see if our assumption now still holds, + // and if not so, optimize. } - // Note - we need srcFs rather than fs, because it is possible that the _files lists files - // which are from a different filesystem than the fs where the _files file itself was loaded - // from. Currently, it is possible, for eg., to do REPL LOAD hdfs://<ip>/dir/ and for the _files - // in it to contain hdfs://<name>/ entries, and/or vice-versa, and this causes errors. - // It might also be possible that there will be a mix of them in a given _files file. - // TODO: revisit close to the end of replv2 dev, to see if our assumption now still holds, - // and if not so, optimize. } - return filePaths; }