HIVE-15212 : merge branch into master - clean up some pre-merge TODOs (Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/55d9ceee Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/55d9ceee Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/55d9ceee Branch: refs/heads/hive-14535 Commit: 55d9ceeea314763ff1db32be8f8c7452963ec831 Parents: 379d9ba Author: sergey <[email protected]> Authored: Wed Sep 27 14:20:46 2017 -0700 Committer: sergey <[email protected]> Committed: Wed Sep 27 14:20:46 2017 -0700 ---------------------------------------------------------------------- .../hive/ql/exec/AbstractFileMergeOperator.java | 14 ++- .../apache/hadoop/hive/ql/exec/CopyTask.java | 4 +- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 16 ++- .../hadoop/hive/ql/exec/FetchOperator.java | 8 +- .../hadoop/hive/ql/exec/FileSinkOperator.java | 56 ++++++---- .../hadoop/hive/ql/exec/ImportCommitTask.java | 5 +- .../hadoop/hive/ql/exec/JoinOperator.java | 5 +- .../apache/hadoop/hive/ql/exec/MoveTask.java | 32 +++--- .../hive/ql/exec/OrcFileMergeOperator.java | 2 +- .../hadoop/hive/ql/exec/StatsNoJobTask.java | 11 +- .../apache/hadoop/hive/ql/exec/StatsTask.java | 5 +- .../apache/hadoop/hive/ql/exec/Utilities.java | 102 ++++++++++++------- .../hive/ql/io/CombineHiveInputFormat.java | 8 +- .../hadoop/hive/ql/io/HiveInputFormat.java | 26 ++--- .../apache/hadoop/hive/ql/metadata/Hive.java | 57 +++++++---- .../formatting/TextMetaDataFormatter.java | 1 - .../ql/optimizer/AbstractBucketJoinProc.java | 2 +- .../hive/ql/optimizer/GenMapRedUtils.java | 32 ++++-- .../optimizer/unionproc/UnionProcFactory.java | 5 +- .../hadoop/hive/ql/parse/GenTezUtils.java | 6 +- .../hive/ql/parse/ImportSemanticAnalyzer.java | 15 +-- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 15 ++- .../hadoop/hive/ql/parse/TaskCompiler.java | 9 +- .../ql/plan/ConditionalResolverMergeFiles.java | 15 ++- .../hadoop/hive/ql/plan/LoadFileDesc.java | 4 +- .../hadoop/hive/ql/plan/LoadTableDesc.java | 17 ++-- .../apache/hadoop/hive/ql/plan/MoveWork.java | 4 +- .../hive/ql/stats/fs/FSStatsAggregator.java | 7 +- .../hive/ql/stats/fs/FSStatsPublisher.java | 7 +- 29 files changed, 300 insertions(+), 190 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index 6b98ecd..b163a1e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -123,8 +123,10 @@ public abstract class AbstractFileMergeOperator<T extends FileMergeDesc> finalPath = new Path(tp, taskId); outPath = new Path(ttp, Utilities.toTempPath(taskId)); } - Utilities.LOG14535.info("Paths for merge " + taskId + ": tmp " + tmpPath + ", task " - + taskTmpPath + ", final " + finalPath + ", out " + outPath); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Paths for merge " + taskId + ": tmp " + tmpPath + ", task " + + taskTmpPath + ", final " + finalPath + ", out " + outPath); + } } /** @@ -173,7 +175,9 @@ public abstract class AbstractFileMergeOperator<T extends FileMergeDesc> Path newTmpPath = new Path(tmpPath, newPath); if (!fs.exists(newTmpPath)) { - Utilities.LOG14535.info("Creating " + newTmpPath); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Creating " + newTmpPath); + } fs.mkdirs(newTmpPath); } @@ -199,7 +203,9 @@ public abstract class AbstractFileMergeOperator<T extends FileMergeDesc> } protected void fixTmpPath(Path path) throws IOException { - Utilities.LOG14535.info("Calling fixTmpPath with " + path); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Calling fixTmpPath with " + path); + } // Fix temp path for alter table ... concatenate if (isListBucketingAlterTableConcatenate) { if (this.tmpPathFixedConcatenate) { http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java index 1f223f5..7299ed1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java @@ -113,7 +113,9 @@ public class CopyTask extends Task<CopyWork> implements Serializable { if (mmDirs == null || mmDirs.length == 0) return null; List<FileStatus> allFiles = new ArrayList<FileStatus>(); for (FileStatus mmDir : mmDirs) { - Utilities.LOG14535.info("Found source MM directory " + mmDir.getPath()); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Found source MM directory " + mmDir.getPath()); + } matchFilesOneDir(fs, mmDir.getPath(), allFiles); } return allFiles.toArray(new FileStatus[allFiles.size()]); http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 8414a78..9502635 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -4162,7 +4162,9 @@ public class DDLTask extends Task<DDLWork> implements Serializable { Path src = allMmDirs.get(i); Path tgt = src.getParent(); String prefix = src.getName().substring(prefixLen + 1) + "_"; - Utilities.LOG14535.info("Will move " + src + " to " + tgt + " (prefix " + prefix + ")"); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt + " (prefix " + prefix + ")"); + } targetPaths.add(tgt); targetPrefix.add(prefix); } @@ -4214,7 +4216,9 @@ public class DDLTask extends Task<DDLWork> implements Serializable { } private static void ensureDelete(FileSystem fs, Path path, String what) throws IOException { - Utilities.LOG14535.info("Deleting " + what + " " + path); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Deleting " + what + " " + path); + } try { if (!fs.delete(path, true)) throw new IOException("delete returned false"); } catch (Exception ex) { @@ -4255,14 +4259,18 @@ public class DDLTask extends Task<DDLWork> implements Serializable { Path src = part.getDataLocation(), tgt = new Path(src, mmDir); srcs.add(src); tgts.add(tgt); - Utilities.LOG14535.info("Will move " + src + " to " + tgt); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt); + } } } else { checkMmLb(tbl); Path src = tbl.getDataLocation(), tgt = new Path(src, mmDir); srcs.add(src); tgts.add(tgt); - Utilities.LOG14535.info("Will move " + src + " to " + tgt); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt); + } } // Don't set inputs and outputs - the locks have already been taken so it's pointless. MoveWork mw = new MoveWork(null, null, null, null, false, SessionState.get().getLineageState()); http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index d2d9946..d09b12b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -269,7 +269,6 @@ public class FetchOperator implements Serializable { while (iterPath.hasNext()) { currPath = iterPath.next(); currDesc = iterPartDesc.next(); - Utilities.LOG14535.debug("Considering " + currPath); if (isNonNativeTable) { return true; } @@ -286,7 +285,6 @@ public class FetchOperator implements Serializable { } } } - Utilities.LOG14535.debug("Done with all the paths"); return false; } @@ -381,7 +379,9 @@ public class FetchOperator implements Serializable { Utilities.copyTableJobPropertiesToConf(currDesc.getTableDesc(), job); InputFormat inputFormat = getInputFormatFromCache(formatter, job); String inputs = processCurrPathForMmWriteIds(inputFormat); - Utilities.LOG14535.info("Setting fetch inputs to " + inputs); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Setting fetch inputs to " + inputs); + } if (inputs == null) return null; job.set("mapred.input.dir", inputs); @@ -414,7 +414,7 @@ public class FetchOperator implements Serializable { validTxnList = null; // non-MM case } if (validTxnList != null) { - Utilities.LOG14535.info("Observing " + currDesc.getTableName() + ": " + validTxnList); + Utilities.FILE_OP_LOGGER.info("Processing " + currDesc.getTableName() + " for MM paths"); } Path[] dirs = HiveInputFormat.processPathsForMmRead(Lists.newArrayList(currPath), job, validTxnList); http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 3544884..280d3cf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -185,9 +185,11 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements txnId = conf.getTransactionId(); stmtId = conf.getStatementId(); } - Utilities.LOG14535.info("new FSPaths for " + numFiles + " files, dynParts = " + bDynParts + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("new FSPaths for " + numFiles + " files, dynParts = " + bDynParts + ": tmpPath " + tmpPath + ", task path " + taskOutputTempPath + " (spec path " + specPath + ")"/*, new Exception()*/); + } outPaths = new Path[numFiles]; finalPaths = new Path[numFiles]; @@ -254,7 +256,9 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements throws IOException, HiveException { if ((bDynParts || isSkewedStoredAsSubDirectories) && !fs.exists(finalPaths[idx].getParent())) { - Utilities.LOG14535.info("commit making path for dyn/skew: " + finalPaths[idx].getParent()); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("commit making path for dyn/skew: " + finalPaths[idx].getParent()); + } FileUtils.mkdir(fs, finalPaths[idx].getParent(), hconf); } // If we're updating or deleting there may be no file to close. This can happen @@ -266,7 +270,10 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements boolean needToRename = outPaths[idx] != null && ((acidOp != Operation.UPDATE && acidOp != Operation.DELETE) || fs.exists(outPaths[idx])); if (needToRename && outPaths[idx] != null) { - Utilities.LOG14535.info("committing " + outPaths[idx] + " to " + finalPaths[idx] + " (" + isMmTable + ")"); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("committing " + outPaths[idx] + " to " + + finalPaths[idx] + " (" + isMmTable + ")"); + } if (isMmTable) { assert outPaths[idx].equals(finalPaths[idx]); commitPaths.add(outPaths[idx]); @@ -381,14 +388,10 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements } public void addToStat(String statType, long amount) { - if ("rowCount".equals(statType)) { - Utilities.LOG14535.info("Adding " + statType + " = " + amount + " to " + System.identityHashCode(this)); - } stat.addToStat(statType, amount); } public Collection<String> getStoredStats() { - Utilities.LOG14535.info("Getting stats from " + System.identityHashCode(this)); return stat.getStoredStats(); } } // class FSPaths @@ -459,8 +462,10 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements unionPath = null; } } - Utilities.LOG14535.info("Setting up FSOP " + System.identityHashCode(this) + " (" + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Setting up FSOP " + System.identityHashCode(this) + " (" + conf.isLinkedFileSink() + ") with " + taskId + " and " + specPath + " + " + unionPath); + } } /** Kryo ctor. */ @@ -544,9 +549,11 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements if (!bDynParts) { fsp = new FSPaths(specPath, conf.isMmTable()); - Utilities.LOG14535.info("creating new paths " + System.identityHashCode(fsp) + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("creating new paths " + System.identityHashCode(fsp) + " from ctor; childSpec " + unionPath + ": tmpPath " + fsp.getTmpPath() + ", task path " + fsp.getTaskOutputTempPath()); + } // Create all the files - this is required because empty files need to be created for // empty buckets @@ -719,10 +726,11 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements throws HiveException { try { fsp.initializeBucketPaths(filesIdx, taskId, isNativeTable, isSkewedStoredAsSubDirectories); - Utilities.LOG14535.info("createBucketForFileIdx " + filesIdx + ": final path " + fsp.finalPaths[filesIdx] + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("createBucketForFileIdx " + filesIdx + ": final path " + fsp.finalPaths[filesIdx] + "; out path " + fsp.outPaths[filesIdx] +" (spec path " + specPath + ", tmp path " - + fsp.getTmpPath() + ", task " + taskId + ")"/*, new Exception()*/); - + + fsp.getTmpPath() + ", task " + taskId + ")"); + } if (LOG.isInfoEnabled()) { LOG.info("New Final Path: FS " + fsp.finalPaths[filesIdx]); } @@ -1002,7 +1010,6 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements protected FSPaths lookupListBucketingPaths(String lbDirName) throws HiveException { FSPaths fsp2 = valToPaths.get(lbDirName); if (fsp2 == null) { - Utilities.LOG14535.info("lookupListBucketingPaths for " + lbDirName); fsp2 = createNewPaths(lbDirName); } return fsp2; @@ -1018,9 +1025,11 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements private FSPaths createNewPaths(String dirName) throws HiveException { FSPaths fsp2 = new FSPaths(specPath, conf.isMmTable()); fsp2.configureDynPartPath(dirName, !conf.isMmTable() && isUnionDp ? unionPath : null); - Utilities.LOG14535.info("creating new paths " + System.identityHashCode(fsp2) + " for " + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("creating new paths " + System.identityHashCode(fsp2) + " for " + dirName + ", childSpec " + unionPath + ": tmpPath " + fsp2.getTmpPath() + ", task path " + fsp2.getTaskOutputTempPath()); + } if(!conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { createBucketFiles(fsp2); valToPaths.put(dirName, fsp2); @@ -1286,7 +1295,9 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements specPath = conf.getParentDir(); unionSuffix = conf.getDirName().getName(); } - Utilities.LOG14535.info("jobCloseOp using specPath " + specPath); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("jobCloseOp using specPath " + specPath); + } if (!conf.isMmTable()) { Utilities.mvFileToFinalPath(specPath, hconf, success, LOG, dpCtx, conf, reporter); } else { @@ -1352,7 +1363,6 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements } private void publishStats() throws HiveException { - Utilities.LOG14535.error("FSOP publishStats called."); boolean isStatsReliable = conf.isStatsReliable(); // Initializing a stats publisher @@ -1383,8 +1393,10 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements for (Map.Entry<String, FSPaths> entry : valToPaths.entrySet()) { String fspKey = entry.getKey(); // DP/LB FSPaths fspValue = entry.getValue(); - Utilities.LOG14535.info("Observing entry for stats " + fspKey + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Observing entry for stats " + fspKey + " => FSP with tmpPath " + fspValue.getTmpPath()); + } // for bucketed tables, hive.optimize.sort.dynamic.partition optimization // adds the taskId to the fspKey. if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { @@ -1397,7 +1409,6 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements // not be retrieved from staging table and hence not aggregated. To avoid this issue // we will remove the taskId from the key which is redundant anyway. fspKey = fspKey.split(taskID)[0]; - Utilities.LOG14535.info("Adjusting fspKey for stats to " + fspKey); } // split[0] = DP, split[1] = LB @@ -1409,14 +1420,17 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements String prefix = conf.getTableInfo().getTableName().toLowerCase(); prefix = Utilities.join(prefix, spSpec, dpSpec); prefix = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR; - Utilities.LOG14535.info("Prefix for stats " + prefix + " (from " + spSpec + ", " + dpSpec + ")"); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace( + "Prefix for stats " + prefix + " (from " + spSpec + ", " + dpSpec + ")"); + } Map<String, String> statsToPublish = new HashMap<String, String>(); for (String statType : fspValue.getStoredStats()) { statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType))); } if (!statsPublisher.publishStat(prefix, statsToPublish)) { - Utilities.LOG14535.error("Failed to publish stats"); + LOG.error("Failed to publish stats"); // The original exception is lost. // Not changing the interface to maintain backward compatibility if (isStatsReliable) { @@ -1426,7 +1440,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements } sContext.setIndexForTezUnion(this.getIndexForTezUnion()); if (!statsPublisher.closeConnection(sContext)) { - Utilities.LOG14535.error("Failed to close stats"); + LOG.error("Failed to close stats"); // The original exception is lost. // Not changing the interface to maintain backward compatibility if (isStatsReliable) { http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java index 27db9a4..b54d457 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java @@ -33,11 +33,12 @@ public class ImportCommitTask extends Task<ImportCommitWork> { @Override public int execute(DriverContext driverContext) { - Utilities.LOG14535.info("Executing ImportCommit for " + work.getTxnId()); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Executing ImportCommit for " + work.getTxnId()); + } try { if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { - Utilities.LOG14535.info("Exiting due to explain"); return 0; } return 0; http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java index 3f5cdf5..65b2f87 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java @@ -231,13 +231,12 @@ public class JoinOperator extends CommonJoinOperator<JoinDesc> implements Serial // Step1: rename tmp output folder to intermediate path. After this // point, updates from speculative tasks still writing to tmpPath // will not appear in finalPath. - log.info("Moving tmp dir: " + tmpPath + " to: " + intermediatePath); - Utilities.LOG14535.info("Moving tmp dir: " + tmpPath + " to: " + intermediatePath + "(spec " + specPath + ")"); + Utilities.FILE_OP_LOGGER.info("Moving tmp dir: " + tmpPath + " to: " + intermediatePath + "(spec " + specPath + ")"); Utilities.rename(fs, tmpPath, intermediatePath); // Step2: remove any tmp file or double-committed output files Utilities.removeTempOrDuplicateFiles(fs, intermediatePath); // Step3: move to the file destination - log.info("Moving tmp dir: " + intermediatePath + " to: " + specPath); + Utilities.FILE_OP_LOGGER.info("Moving tmp dir: " + intermediatePath + " to: " + specPath); Utilities.renameOrMoveFiles(fs, intermediatePath, specPath); } } else { http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index ed527b8..e644845 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -267,9 +267,11 @@ public class MoveTask extends Task<MoveWork> implements Serializable { @Override public int execute(DriverContext driverContext) { if (work.isNoop()) return 0; - Utilities.LOG14535.info("Executing MoveWork " + System.identityHashCode(work) + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Executing MoveWork " + System.identityHashCode(work) + " with " + work.getLoadFileWork() + "; " + work.getLoadTableWork() + "; " + work.getLoadMultiFilesWork()); + } try { if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { @@ -284,9 +286,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable { Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); if (targetPath.equals(sourcePath)) { - Utilities.LOG14535.info("MoveTask not moving LFD " + sourcePath); + Utilities.FILE_OP_LOGGER.debug("MoveTask not moving " + sourcePath); } else { - Utilities.LOG14535.info("MoveTask moving LFD " + sourcePath + " to " + targetPath); + Utilities.FILE_OP_LOGGER.debug("MoveTask moving " + sourcePath + " to " + targetPath); if(lfd.getWriteType() == AcidUtils.Operation.INSERT) { //'targetPath' is table root of un-partitioned table/partition //'sourcePath' result of 'select ...' part of CTAS statement @@ -316,7 +318,7 @@ public class MoveTask extends Task<MoveWork> implements Serializable { if (!destFs.exists(destPath.getParent())) { destFs.mkdirs(destPath.getParent()); } - Utilities.LOG14535.info("MoveTask moving LMFD " + srcPath + " to " + destPath); + Utilities.FILE_OP_LOGGER.debug("MoveTask moving (multi-file) " + srcPath + " to " + destPath); moveFile(srcPath, destPath, isDfsDir); } else { if (!destFs.exists(destPath)) { @@ -328,11 +330,11 @@ public class MoveTask extends Task<MoveWork> implements Serializable { for (FileStatus child : children) { Path childSrc = child.getPath(); Path childDest = new Path(destPath, filePrefix + childSrc.getName()); - Utilities.LOG14535.info("MoveTask moving LMFD " + childSrc + " to " + childDest); + Utilities.FILE_OP_LOGGER.debug("MoveTask moving (multi-file) " + childSrc + " to " + childDest); moveFile(childSrc, childDest, isDfsDir); } } else { - Utilities.LOG14535.info("MoveTask skipping empty directory LMFD " + srcPath); + Utilities.FILE_OP_LOGGER.debug("MoveTask skipping empty directory (multi-file) " + srcPath); } if (!srcFs.delete(srcPath, false)) { throw new IOException("Couldn't delete " + srcPath + " after moving all the files"); @@ -356,7 +358,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable { mesg.append(')'); } String mesg_detail = " from " + tbd.getSourcePath(); - Utilities.LOG14535.info("" + mesg.toString() + " " + mesg_detail); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace(mesg.toString() + " " + mesg_detail); + } console.printInfo(mesg.toString(), mesg_detail); Table table = db.getTable(tbd.getTable().getTableName()); @@ -369,10 +373,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable { DataContainer dc = null; if (tbd.getPartitionSpec().size() == 0) { dc = new DataContainer(table.getTTable()); - Utilities.LOG14535.info("loadTable called from " + tbd.getSourcePath() + " into " + tbd.getTable().getTableName()); - if (tbd.isMmTable() && !tbd.isCommitMmWrite()) { - throw new HiveException( - "Only single-partition LoadTableDesc can skip commiting write ID"); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("loadTable called from " + tbd.getSourcePath() + + " into " + tbd.getTable().getTableName()); } db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getReplace(), work.isSrcLocal(), isSkewedStoredAsDirs(tbd), isFullAcidOp, hasFollowingStatsTask(), @@ -448,8 +451,10 @@ public class MoveTask extends Task<MoveWork> implements Serializable { TaskInformation ti) throws HiveException, IOException, InvalidOperationException { List<String> partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec()); db.validatePartitionNameCharacters(partVals); - Utilities.LOG14535.info("loadPartition called from " + tbd.getSourcePath() + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("loadPartition called from " + tbd.getSourcePath() + " into " + tbd.getTable().getTableName()); + } db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), @@ -488,9 +493,6 @@ public class MoveTask extends Task<MoveWork> implements Serializable { // iterate over it and call loadPartition() here. // The reason we don't do inside HIVE-1361 is the latter is large and we // want to isolate any potential issue it may introduce. - if (tbd.isMmTable() && !tbd.isCommitMmWrite()) { - throw new HiveException("Only single-partition LoadTableDesc can skip commiting write ID"); - } Map<Map<String, String>, Partition> dp = db.loadDynamicPartitions( tbd.getSourcePath(), http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java index a1ad243..50160a9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java @@ -96,7 +96,7 @@ public class OrcFileMergeOperator extends filePath = k.getInputPath().toUri().getPath(); - Utilities.LOG14535.info("OrcFileMergeOperator processing " + filePath); + Utilities.FILE_OP_LOGGER.info("OrcFileMergeOperator processing " + filePath); fixTmpPath(k.getInputPath().getParent()); http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java index 65363ed..ad5a96b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java @@ -239,7 +239,7 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable Map<String, String> parameters = tTable.getParameters(); try { Path dir = new Path(tTable.getSd().getLocation()); - Utilities.LOG14535.info("Aggregating stats for " + dir); + LOG.debug("Aggregating stats for " + dir); long numRows = 0; long rawDataSize = 0; long fileSize = 0; @@ -249,7 +249,7 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable boolean statsAvailable = false; for(FileStatus file: fileList) { - Utilities.LOG14535.info("Computing stats for " + file); + LOG.debug("Computing stats for " + file); if (!file.isDir()) { InputFormat<?, ?> inputFormat = ReflectionUtil.newInstance( table.getInputFormatClass(), jc); @@ -286,11 +286,10 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable db.alterTable(tableFullName, new Table(tTable), environmentContext); String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']'; - Utilities.LOG14535.debug(msg); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace(msg); + } console.printInfo(msg); - } else { - String msg = "Table " + tableFullName + " does not provide stats."; - Utilities.LOG14535.debug(msg); } } catch (Exception e) { console.printInfo("[Warning] could not update stats for " + tableFullName + ".", http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index 91a9c88..1725629 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -217,7 +217,10 @@ public class StatsTask extends Task<StatsWork> implements Serializable { if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); } - Utilities.LOG14535.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace( + "Table " + tableFullName + " stats: [" + toString(parameters) + ']'); + } } else { // Partitioned table: // Need to get the old stats of the partition http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index b1c7844..cb3cb03 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -220,9 +220,12 @@ import java.util.concurrent.Executors; */ @SuppressWarnings({ "nls", "deprecation" }) public final class Utilities { - - // TODO# remove when merging; convert some statements to local loggers, remove others - public static final Logger LOG14535 = LoggerFactory.getLogger("Log14535"); + /** + * A logger mostly used to trace-log the details of Hive table file operations. Filtering the + * logs for FileOperations (with trace logs present) allows one to debug what Hive has done with + * various files and directories while committing writes, as well as reading. + */ + public static final Logger FILE_OP_LOGGER = LoggerFactory.getLogger("FileOperations"); /** * The object in the reducer are composed of these top level fields. @@ -1475,7 +1478,9 @@ public final class Utilities { perfLogger.PerfLogEnd("FileSinkOperator", "CreateEmptyBuckets"); } // move to the file destination - Utilities.LOG14535.info("Moving tmp dir: " + tmpPath + " to: " + specPath); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Moving tmp dir: " + tmpPath + " to: " + specPath); + } perfLogger.PerfLogBegin("FileSinkOperator", "RenameOrMoveFiles"); if (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVE_EXEC_MOVE_FILES_FROM_SOURCE_DIR)) { // HIVE-17113 - avoid copying files that may have been written to the temp dir by runaway tasks, @@ -1487,10 +1492,14 @@ public final class Utilities { perfLogger.PerfLogEnd("FileSinkOperator", "RenameOrMoveFiles"); } } else { - Utilities.LOG14535.info("deleting tmpPath " + tmpPath); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("deleting tmpPath " + tmpPath); + } fs.delete(tmpPath, true); } - Utilities.LOG14535.info("deleting taskTmpPath " + taskTmpPath); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("deleting taskTmpPath " + taskTmpPath); + } fs.delete(taskTmpPath, true); } @@ -1533,7 +1542,9 @@ public final class Utilities { } for (Path path : paths) { - Utilities.LOG14535.info("creating empty bucket for " + path); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("creating empty bucket for " + path); + } RecordWriter writer = HiveFileFormatUtils.getRecordWriter( jc, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), path, reporter); @@ -1610,7 +1621,6 @@ public final class Utilities { assert parts[i].isDirectory() : "dynamic partition " + parts[i].getPath() + " is not a directory"; Path path = parts[i].getPath(); - Utilities.LOG14535.info("removeTempOrDuplicateFiles looking at DP " + path); if (removeEmptyDpDirectory(fs, path)) { parts[i] = null; continue; @@ -1622,7 +1632,10 @@ public final class Utilities { if (!mmDir.getName().equals(AcidUtils.deltaSubdir(txnId, txnId, stmtId))) { throw new IOException("Unexpected non-MM directory name " + mmDir); } - Utilities.LOG14535.info("removeTempOrDuplicateFiles processing files in MM directory " + mmDir); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace( + "removeTempOrDuplicateFiles processing files in MM directory " + mmDir); + } } taskIDToFile = removeTempOrDuplicateFilesNonMm(items, fs); if (filesKept != null && taskIDToFile != null) { @@ -1650,8 +1663,10 @@ public final class Utilities { if (!mmDir.getName().equals(AcidUtils.deltaSubdir(txnId, txnId, stmtId))) { throw new IOException("Unexpected non-MM directory " + mmDir); } - Utilities.LOG14535.info( + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace( "removeTempOrDuplicateFiles processing files in MM directory " + mmDir); + } taskIDToFile = removeTempOrDuplicateFilesNonMm(fs.listStatus(mmDir), fs); if (filesKept != null && taskIDToFile != null) { addFilesToPathSet(taskIDToFile.values(), filesKept); @@ -1687,7 +1702,6 @@ public final class Utilities { HashMap<String, FileStatus> taskIDToFile, int numBuckets, List<Path> result) { String taskID1 = taskIDToFile.keySet().iterator().next(); Path bucketPath = taskIDToFile.values().iterator().next().getPath(); - Utilities.LOG14535.info("Bucket path " + bucketPath); for (int j = 0; j < numBuckets; ++j) { addBucketFileIfMissing(result, taskIDToFile, taskID1, bucketPath, j); } @@ -1700,7 +1714,9 @@ public final class Utilities { // create empty bucket, file name should be derived from taskID2 URI bucketUri = bucketPath.toUri(); String path2 = replaceTaskIdFromFilename(bucketUri.getPath().toString(), j); - Utilities.LOG14535.info("Creating an empty bucket file " + path2); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Creating an empty bucket file " + path2); + } result.add(new Path(bucketUri.getScheme(), bucketUri.getAuthority(), path2)); } } @@ -1714,7 +1730,9 @@ public final class Utilities { for (FileStatus one : files) { if (isTempPath(one)) { - Utilities.LOG14535.info("removeTempOrDuplicateFiles deleting " + one.getPath()/*, new Exception()*/); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("removeTempOrDuplicateFiles deleting " + one.getPath()); + } if (!fs.delete(one.getPath(), true)) { throw new IOException("Unable to delete tmp file: " + one.getPath()); } @@ -1729,7 +1747,10 @@ public final class Utilities { private static void ponderRemovingTempOrDuplicateFile(FileSystem fs, FileStatus file, HashMap<String, FileStatus> taskIdToFile) throws IOException { String taskId = getPrefixedTaskIdFromFilename(file.getPath().getName()); - Utilities.LOG14535.info("removeTempOrDuplicateFiles pondering " + file.getPath() + ", taskId " + taskId); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("removeTempOrDuplicateFiles looking at " + + file.getPath() + ", taskId " + taskId); + } FileStatus otherFile = taskIdToFile.get(taskId); taskIdToFile.put(taskId, (otherFile == null) ? file : @@ -2601,10 +2622,13 @@ public final class Utilities { // generate a full partition specification LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>(partSpec); if (!Warehouse.makeSpecFromName(fullPartSpec, partPath, new HashSet<String>(partSpec.keySet()))) { - Utilities.LOG14535.warn("Ignoring invalid DP directory " + partPath); + Utilities.FILE_OP_LOGGER.warn("Ignoring invalid DP directory " + partPath); continue; } - Utilities.LOG14535.info("Adding partition spec from " + partPath + ": " + fullPartSpec); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Adding partition spec from " + + partPath + ": " + fullPartSpec); + } fullPartSpecs.add(fullPartSpec); } return fullPartSpecs; @@ -4037,14 +4061,16 @@ public final class Utilities { + " (when shortened to " + dir + ")"); } String subDir = dir.substring(relRoot.length()); - Utilities.LOG14535.info("Looking at " + subDir + " from " + lfs.getPath()); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Looking at " + subDir + " from " + lfs.getPath()); + } // If sorted, we'll skip a bunch of files. if (lastRelDir != null && subDir.startsWith(lastRelDir)) continue; int startIx = skipLevels > 0 ? -1 : 0; for (int i = 0; i < skipLevels; ++i) { startIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1); if (startIx == -1) { - Utilities.LOG14535.info("Expected level of nesting (" + skipLevels + ") is not " + Utilities.FILE_OP_LOGGER.info("Expected level of nesting (" + skipLevels + ") is not " + " present in " + subDir + " (from " + lfs.getPath() + ")"); break; } @@ -4052,13 +4078,12 @@ public final class Utilities { if (startIx == -1) continue; int endIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1); if (endIx == -1) { - Utilities.LOG14535.info("Expected level of nesting (" + (skipLevels + 1) + ") is not " + Utilities.FILE_OP_LOGGER.info("Expected level of nesting (" + (skipLevels + 1) + ") is not " + " present in " + subDir + " (from " + lfs.getPath() + ")"); continue; } lastRelDir = subDir = subDir.substring(0, endIx); Path candidate = new Path(relRoot, subDir); - Utilities.LOG14535.info("Considering MM directory candidate " + candidate); if (!filter.accept(candidate)) continue; results.add(fs.makeQualified(candidate)); } @@ -4073,7 +4098,6 @@ public final class Utilities { } sb.append(Path.SEPARATOR).append(AcidUtils.deltaSubdir(txnId, txnId, stmtId)); Path pathPattern = new Path(path, sb.toString()); - Utilities.LOG14535.info("Looking for files via: " + pathPattern); return statusToPath(fs.globStatus(pathPattern, filter)); } @@ -4084,11 +4108,11 @@ public final class Utilities { fs, specPath, dpLevels, lbLevels, filter, txnId, stmtId, conf); if (files != null) { for (Path path : files) { - Utilities.LOG14535.info("Deleting " + path + " on failure"); + Utilities.FILE_OP_LOGGER.info("Deleting " + path + " on failure"); tryDelete(fs, path); } } - Utilities.LOG14535.info("Deleting " + manifestDir + " on failure"); + Utilities.FILE_OP_LOGGER.info("Deleting " + manifestDir + " on failure"); fs.delete(manifestDir, true); } @@ -4099,7 +4123,7 @@ public final class Utilities { // We assume one FSOP per task (per specPath), so we create it in specPath. Path manifestPath = getManifestDir(specPath, txnId, stmtId, unionSuffix); manifestPath = new Path(manifestPath, taskId + MANIFEST_EXTENSION); - Utilities.LOG14535.info("Writing manifest to " + manifestPath + " with " + commitPaths); + Utilities.FILE_OP_LOGGER.info("Writing manifest to " + manifestPath + " with " + commitPaths); try { // Don't overwrite the manifest... should fail if we have collisions. try (FSDataOutputStream out = fs.create(manifestPath, false)) { @@ -4144,7 +4168,7 @@ public final class Utilities { return; } - Utilities.LOG14535.info("Looking for manifests in: " + manifestDir + " (" + txnId + ")"); + Utilities.FILE_OP_LOGGER.debug("Looking for manifests in: " + manifestDir + " (" + txnId + ")"); List<Path> manifests = new ArrayList<>(); if (fs.exists(manifestDir)) { FileStatus[] manifestFiles = fs.listStatus(manifestDir); @@ -4152,21 +4176,21 @@ public final class Utilities { for (FileStatus status : manifestFiles) { Path path = status.getPath(); if (path.getName().endsWith(MANIFEST_EXTENSION)) { - Utilities.LOG14535.info("Reading manifest " + path); + Utilities.FILE_OP_LOGGER.info("Reading manifest " + path); manifests.add(path); } } } } else { - Utilities.LOG14535.info("No manifests found - query produced no output"); + Utilities.FILE_OP_LOGGER.info("No manifests found - query produced no output"); manifestDir = null; } - Utilities.LOG14535.info("Looking for files in: " + specPath); + Utilities.FILE_OP_LOGGER.debug("Looking for files in: " + specPath); JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true); if (isMmCtas && !fs.exists(specPath)) { // TODO: do we also need to do this when creating an empty partition from select? - Utilities.LOG14535.info("Creating table directory for CTAS with no output at " + specPath); + Utilities.FILE_OP_LOGGER.info("Creating table directory for CTAS with no output at " + specPath); FileUtils.mkdir(fs, specPath, hconf); } Path[] files = getMmDirectoryCandidates( @@ -4174,7 +4198,9 @@ public final class Utilities { ArrayList<Path> mmDirectories = new ArrayList<>(); if (files != null) { for (Path path : files) { - Utilities.LOG14535.info("Looking at path: " + path); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Looking at path: " + path); + } mmDirectories.add(path); } } @@ -4193,14 +4219,14 @@ public final class Utilities { } if (manifestDir != null) { - Utilities.LOG14535.info("Deleting manifest directory " + manifestDir); + Utilities.FILE_OP_LOGGER.info("Deleting manifest directory " + manifestDir); tryDelete(fs, manifestDir); if (unionSuffix != null) { // Also delete the parent directory if we are the last union FSOP to execute. manifestDir = manifestDir.getParent(); FileStatus[] remainingFiles = fs.listStatus(manifestDir); if (remainingFiles == null || remainingFiles.length == 0) { - Utilities.LOG14535.info("Deleting manifest directory " + manifestDir); + Utilities.FILE_OP_LOGGER.info("Deleting manifest directory " + manifestDir); tryDelete(fs, manifestDir); } } @@ -4257,15 +4283,17 @@ public final class Utilities { // Found the right union directory; treat it as "our" MM directory. cleanMmDirectory(childPath, fs, null, committed); } else { - Utilities.LOG14535.info("FSOP for " + unionSuffix + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("FSOP for " + unionSuffix + " is ignoring the other side of the union " + childPath.getName()); + } } } } private static void deleteUncommitedFile(Path childPath, FileSystem fs) throws IOException, HiveException { - Utilities.LOG14535.info("Deleting " + childPath + " that was not committed"); + Utilities.FILE_OP_LOGGER.info("Deleting " + childPath + " that was not committed"); // We should actually succeed here - if we fail, don't commit the query. if (!fs.delete(childPath, true)) { throw new HiveException("Failed to delete an uncommitted path " + childPath); @@ -4278,7 +4306,9 @@ public final class Utilities { */ public static List<Path> getValidMmDirectoriesFromTableOrPart(Path path, Configuration conf, ValidTxnList validTxnList, int lbLevels) throws IOException { - Utilities.LOG14535.info("Looking for valid MM paths under " + path); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Looking for valid MM paths under " + path); + } // NULL means this directory is entirely valid. List<Path> result = null; FileSystem fs = path.getFileSystem(conf); @@ -4289,7 +4319,7 @@ public final class Utilities { Path childPath = file.getPath(); Long txnId = JavaUtils.extractTxnId(childPath); if (!file.isDirectory() || txnId == null || !validTxnList.isTxnValid(txnId)) { - Utilities.LOG14535.info("Skipping path " + childPath); + Utilities.FILE_OP_LOGGER.debug("Skipping path " + childPath); if (result == null) { result = new ArrayList<>(children.length - 1); for (int j = 0; j < i; ++j) { http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java index 6f2aaf0..93de69f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java @@ -121,14 +121,12 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ isMmNonMerge = !isMerge && MetaStoreUtils.isInsertOnlyTable(tbl.getProperties()); } else { // This would be the case for obscure tasks like truncate column (unsupported for MM). - Utilities.LOG14535.warn("Assuming not insert-only; no table in partition spec " + part); + Utilities.FILE_OP_LOGGER.warn("Assuming not insert-only; no table in partition spec " + part); } if (isAvoidSplitCombine || isMmNonMerge) { - //if (LOG.isDebugEnabled()) { - Utilities.LOG14535.info("The path [" + paths[i + start] + - "] is being parked for HiveInputFormat.getSplits"); - //} + Utilities.FILE_OP_LOGGER.info("The path [" + paths[i + start] + + "] is being parked for HiveInputFormat.getSplits"); nonCombinablePathIndices.add(i + start); } } http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index c3b9190..fba5b7e 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -520,7 +520,9 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> private static void processForWriteIds(Path dir, JobConf conf, ValidTxnList validTxnList, List<Path> finalPaths) throws IOException { FileSystem fs = dir.getFileSystem(conf); - Utilities.LOG14535.warn("Checking " + dir + " (root) for inputs"); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Checking " + dir + " (root) for inputs"); + } // Ignore nullscan-optimized paths. if (fs instanceof NullScanFileSystem) { finalPaths.add(dir); @@ -537,20 +539,20 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> boolean hadAcidState = false; // whether getAcidState has been called for currDir for (FileStatus file : files) { Path path = file.getPath(); - Utilities.LOG14535.warn("Checking " + path + " for inputs"); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Checking " + path + " for inputs"); + } if (!file.isDirectory()) { - Utilities.LOG14535.warn("Ignoring a file not in MM directory " + path); + Utilities.FILE_OP_LOGGER.warn("Ignoring a file not in MM directory " + path); } else if (JavaUtils.extractTxnId(path) == null) { subdirs.add(path); - } else { - if (!hadAcidState) { - AcidUtils.Directory dirInfo = AcidUtils.getAcidState(currDir, conf, validTxnList, Ref.from(false), true, null); - hadAcidState = true; - // todo for IOW, we also need to count in base dir, if any - for (AcidUtils.ParsedDelta delta : dirInfo.getCurrentDirectories()) { - Utilities.LOG14535.info("Adding input " + delta.getPath()); - finalPaths.add(delta.getPath()); - } + } else if (!hadAcidState) { + AcidUtils.Directory dirInfo = AcidUtils.getAcidState(currDir, conf, validTxnList, Ref.from(false), true, null); + hadAcidState = true; + // TODO: for IOW, we also need to count in base dir, if any + for (AcidUtils.ParsedDelta delta : dirInfo.getCurrentDirectories()) { + Utilities.FILE_OP_LOGGER.debug("Adding input " + delta.getPath()); + finalPaths.add(delta.getPath()); } } } http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 39808b2..d4282ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1724,12 +1724,17 @@ public class Hive { // TODO: this assumes both paths are qualified; which they are, currently. if (isMmTableWrite && loadPath.equals(newPartPath)) { // MM insert query, move itself is a no-op. - Utilities.LOG14535.info("not moving " + loadPath + " to " + newPartPath + " (MM)"); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("not moving " + loadPath + " to " + newPartPath + " (MM)"); + } assert !isAcid; if (areEventsForDmlNeeded(tbl, oldPart)) { newFiles = listFilesCreatedByQuery(loadPath, txnId, stmtId); } - Utilities.LOG14535.info("maybe deleting stuff from " + oldPartPath + " (new " + newPartPath + ") for replace"); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("maybe deleting stuff from " + oldPartPath + + " (new " + newPartPath + ") for replace"); + } if (replace && oldPartPath != null) { boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); deleteOldPathForReplace(newPartPath, oldPartPath, getConf(), isAutoPurge, @@ -1745,7 +1750,9 @@ public class Hive { destPath = new Path(destPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); filter = replace ? new JavaUtils.IdPathFilter(txnId, stmtId, false, true) : filter; } - Utilities.LOG14535.info("moving " + loadPath + " to " + destPath); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("moving " + loadPath + " to " + destPath); + } if (replace || (oldPart == null && !isAcid)) { boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), @@ -1916,7 +1923,9 @@ private void walkDirTree(FileStatus fSta, FileSystem fSys, /* Base Case. It's leaf. */ if (!fSta.isDir()) { - Utilities.LOG14535.info("Processing LB leaf " + fSta.getPath()); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Processing LB leaf " + fSta.getPath()); + } /* construct one location map if not exists. */ constructOneLBLocationMap(fSta, skewedColValueLocationMaps, newPartPath, skewedInfo); return; @@ -1925,7 +1934,9 @@ private void walkDirTree(FileStatus fSta, FileSystem fSys, /* dfs. */ FileStatus[] children = fSys.listStatus(fSta.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); if (children != null) { - Utilities.LOG14535.info("Processing LB dir " + fSta.getPath()); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Processing LB dir " + fSta.getPath()); + } for (FileStatus child : children) { walkDirTree(child, fSys, skewedColValueLocationMaps, newPartPath, skewedInfo); } @@ -1974,8 +1985,10 @@ private void constructOneLBLocationMap(FileStatus fSta, for (int i = 0; i < (dirNames.length - dirsToTake); ++i) { lbdPath = lbdPath.getParent(); } - Utilities.LOG14535.info("Saving LB location " + lbdPath + " based on " + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Saving LB location " + lbdPath + " based on " + colCount + " keys and " + fSta.getPath()); + } if ((skewedValue.size() > 0) && (skewedValue.size() == colCount) && !skewedColValueLocationMaps.containsKey(skewedValue)) { skewedColValueLocationMaps.put(skewedValue, lbdPath.toString()); @@ -1993,7 +2006,6 @@ private void constructOneLBLocationMap(FileStatus fSta, */ private Map<List<String>, String> constructListBucketingLocationMap(Path newPartPath, SkewedInfo skewedInfo) throws IOException, FileNotFoundException { - Utilities.LOG14535.info("Constructing list bucketing map for " + newPartPath); Map<List<String>, String> skewedColValueLocationMaps = new HashMap<List<String>, String>(); FileSystem fSys = newPartPath.getFileSystem(conf); walkDirTree(fSys.getFileStatus(newPartPath), @@ -2021,7 +2033,6 @@ private void constructOneLBLocationMap(FileStatus fSta, throw new HiveException("partition " + s.getPath() + " is not a directory!"); } Path dpPath = s.getPath(); - Utilities.LOG14535.info("Found DP " + dpPath); validPartitions.add(dpPath); } } else { @@ -2034,7 +2045,9 @@ private void constructOneLBLocationMap(FileStatus fSta, for (int i = 0; i < numLB; ++i) { dpPath = dpPath.getParent(); // Now skip the LB directories, if any... } - Utilities.LOG14535.info("Found DP " + dpPath); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Found DP " + dpPath); + } validPartitions.add(dpPath); } } @@ -2111,7 +2124,7 @@ private void constructOneLBLocationMap(FileStatus fSta, final LinkedHashMap<String, String> fullPartSpec = Maps.newLinkedHashMap(partSpec); if (!Warehouse.makeSpecFromName( fullPartSpec, partPath, new HashSet<String>(partSpec.keySet()))) { - Utilities.LOG14535.warn("Ignoring invalid DP directory " + partPath); + Utilities.FILE_OP_LOGGER.warn("Ignoring invalid DP directory " + partPath); continue; } futures.add(pool.submit(new Callable<Void>() { @@ -2123,7 +2136,6 @@ private void constructOneLBLocationMap(FileStatus fSta, LOG.info("New loading path = " + partPath + " with partSpec " + fullPartSpec); // load the partition - Utilities.LOG14535.info("loadPartition called for DPP from " + partPath + " to " + tbl.getTableName()); Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, replace, true, numLB > 0, false, isAcid, hasFollowingStatsTask, txnId, stmtId); @@ -2227,7 +2239,7 @@ private void constructOneLBLocationMap(FileStatus fSta, } // TODO: this assumes both paths are qualified; which they are, currently. if (isMmTable && loadPath.equals(tbl.getPath())) { - Utilities.LOG14535.info("not moving " + loadPath + " to " + tbl.getPath()); + Utilities.FILE_OP_LOGGER.debug("not moving " + loadPath + " to " + tbl.getPath()); if (replace) { Path tableDest = tbl.getPath(); boolean isAutopurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); @@ -2245,7 +2257,7 @@ private void constructOneLBLocationMap(FileStatus fSta, destPath = new Path(destPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); filter = replace ? new JavaUtils.IdPathFilter(txnId, stmtId, false, true) : filter; } - Utilities.LOG14535.info("moving " + loadPath + " to " + tblPath + " (replace = " + replace + ")"); + Utilities.FILE_OP_LOGGER.debug("moving " + loadPath + " to " + tblPath + " (replace = " + replace + ")"); if (replace) { boolean isAutopurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); replaceFiles(tblPath, loadPath, destPath, tblPath, @@ -3431,7 +3443,9 @@ private void constructOneLBLocationMap(FileStatus fSta, Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Move-Thread-%d").build()) : null; if (destIsSubDirOfSrc && !destFs.exists(destf)) { - Utilities.LOG14535.info("Creating " + destf); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Creating " + destf); + } destFs.mkdirs(destf); } /* Move files one by one because source is a subdirectory of destination */ @@ -3831,7 +3845,8 @@ private void constructOneLBLocationMap(FileStatus fSta, private void deleteOldPathForReplace(Path destPath, Path oldPath, HiveConf conf, boolean purge, PathFilter pathFilter, boolean isMmTable, int lbLevels) throws HiveException { - Utilities.LOG14535.info("Deleting old paths for replace in " + destPath + " and old path " + oldPath); + Utilities.FILE_OP_LOGGER.debug("Deleting old paths for replace in " + destPath + + " and old path " + oldPath); boolean isOldPathUnderDestf = false; try { FileSystem oldFs = oldPath.getFileSystem(conf); @@ -3860,7 +3875,7 @@ private void constructOneLBLocationMap(FileStatus fSta, if (!lbDir.isDirectory()) { throw new HiveException("Unexpected path during overwrite: " + lbPath); } - Utilities.LOG14535.info("Cleaning up LB directory " + lbPath); + Utilities.FILE_OP_LOGGER.info("Cleaning up LB directory " + lbPath); cleanUpOneDirectoryForReplace(lbPath, oldFs, pathFilter, conf, purge); } } @@ -3882,11 +3897,13 @@ private void constructOneLBLocationMap(FileStatus fSta, PathFilter pathFilter, HiveConf conf, boolean purge) throws IOException, HiveException { FileStatus[] statuses = fs.listStatus(path, pathFilter); if (statuses == null || statuses.length == 0) return; - String s = "Deleting files under " + path + " for replace: "; - for (FileStatus file : statuses) { - s += file.getPath().getName() + ", "; + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + String s = "Deleting files under " + path + " for replace: "; + for (FileStatus file : statuses) { + s += file.getPath().getName() + ", "; + } + Utilities.FILE_OP_LOGGER.trace(s); } - Utilities.LOG14535.info(s); if (!trashFiles(fs, statuses, conf, purge)) { throw new HiveException("Old path " + path + " has not been cleaned up."); } http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java index 41bbdfa..3c25896 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java @@ -407,7 +407,6 @@ class TextMetaDataFormatter implements MetaDataFormatter { } private void processDir(FileStatus status, FileSystem fs, FileData fd) throws IOException { - Utilities.LOG14535.info("Processing dir for status: " + status.getPath()); long accessTime = status.getAccessTime(); long updateTime = status.getModificationTime(); if (accessTime > fd.lastAccessTime) { http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java index 02bdd91..66ea4e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java @@ -265,7 +265,7 @@ abstract public class AbstractBucketJoinProc implements NodeProcessor { Table tbl = tso.getConf().getTableMetadata(); if (MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())) { - Utilities.LOG14535.debug("No bucketed join on MM table " + tbl.getTableName()); + Utilities.FILE_OP_LOGGER.debug("No bucketed join on MM table " + tbl.getTableName()); return false; } if (tbl.isPartitioned()) { http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 6a56407..9ffda7e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1266,9 +1266,11 @@ public final class GenMapRedUtils { // 1. create the operator tree // FileSinkDesc fsInputDesc = fsInput.getConf(); - Utilities.LOG14535.info("Creating merge work from " + System.identityHashCode(fsInput) + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Creating merge work from " + System.identityHashCode(fsInput) + " with write ID " + (fsInputDesc.isMmTable() ? fsInputDesc.getTransactionId() : null) + " into " + finalName); + } boolean isBlockMerge = (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) || @@ -1367,12 +1369,11 @@ public final class GenMapRedUtils { MoveWork dummyMv = null; if (srcMmWriteId == null) { // Only create the movework for non-MM table. No action needed for a MM table. - Utilities.LOG14535.info("creating dummy movetask for merge (with lfd)"); dummyMv = new MoveWork(null, null, null, new LoadFileDesc(inputDirName, finalName, true, null, null, false), false, SessionState.get().getLineageState()); } else { - // TODO# create the noop MoveWork to avoid q file changes for now. Should be removed w/the flag just before merge + // TODO# noop MoveWork to avoid q file changes in HIVE-14990. Remove (w/the flag) after merge. dummyMv = new MoveWork(null, null, null, new LoadFileDesc(inputDirName, finalName, true, null, null, false), false, SessionState.get().getLineageState()); @@ -1383,7 +1384,6 @@ public final class GenMapRedUtils { // can only be triggered for a merge that's part of insert for now; MM tables do not support // concatenate. Keeping the old logic for non-MM tables with temp directories and stuff. Path fsopPath = srcMmWriteId != null ? fsInputDesc.getFinalDirName() : finalName; - Utilities.LOG14535.info("Looking for MoveTask to make it dependant on the conditional tasks"); Task<MoveWork> mvTask = GenMapRedUtils.findMoveTaskForFsopOutput( mvTasks, fsopPath, fsInputDesc.isMmTable()); @@ -1565,7 +1565,6 @@ public final class GenMapRedUtils { TableDesc tblDesc = fsDesc.getTableInfo(); aliases.add(inputDirStr); // dummy alias: just use the input path - Utilities.LOG14535.info("createMRWorkForMergingFiles for " + inputDir); // constructing the default MapredWork MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf); MapWork cplan = cMrPlan.getMapWork(); @@ -1616,7 +1615,9 @@ public final class GenMapRedUtils { + " format other than RCFile or ORCFile"); } - Utilities.LOG14535.info("creating mergefilework from " + inputDirs + " to " + finalName); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("creating mergefilework from " + inputDirs + " to " + finalName); + } // create the merge file work MergeFileWork work = new MergeFileWork(inputDirs, finalName, hasDynamicPartitions, tblDesc.getInputFileFormatClass().getName(), tblDesc); @@ -1749,7 +1750,9 @@ public final class GenMapRedUtils { Task<? extends Serializable> currTask, MoveWork mvWork, Serializable mergeWork, Path condInputPath, Path condOutputPath, Task<MoveWork> moveTaskToLink, DependencyCollectionTask dependencyTask) { - Utilities.LOG14535.info("Creating conditional merge task for " + condInputPath); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Creating conditional merge task for " + condInputPath); + } // Create a dummy task if no move is needed. Serializable moveWork = mvWork != null ? mvWork : new DependencyCollectionWork(); @@ -1849,9 +1852,11 @@ public final class GenMapRedUtils { } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourcePath(); } - Utilities.LOG14535.info("Observing MoveWork " + System.identityHashCode(mvWork) + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Observing MoveWork " + System.identityHashCode(mvWork) + " with " + srcDir + "(from " + (isLfd ? "LFD" : "LTD") + ") while looking for " + fsopFinalDir + "(mm = " + isMmFsop + ")"); + } if ((srcDir != null) && srcDir.equals(fsopFinalDir)) { return mvTsk; @@ -1958,11 +1963,17 @@ public final class GenMapRedUtils { if (fileSinkDesc.isLinkedFileSink()) { for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) { fsConf.setDirName(new Path(tmpDir, fsConf.getDirName().getName())); - Utilities.LOG14535.info("createMoveTask setting tmpDir for LinkedFileSink chDir " + fsConf.getDirName() + "; dest was " + fileSinkDesc.getDestPath()); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("createMoveTask setting tmpDir for LinkedFileSink chDir " + + fsConf.getDirName() + "; dest was " + fileSinkDesc.getDestPath()); + } } } else { fileSinkDesc.setDirName(tmpDir); - Utilities.LOG14535.info("createMoveTask setting tmpDir chDir " + tmpDir + "; dest was " + fileSinkDesc.getDestPath()); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("createMoveTask setting tmpDir chDir " + + tmpDir + "; dest was " + fileSinkDesc.getDestPath()); + } } } } @@ -1970,7 +1981,6 @@ public final class GenMapRedUtils { Task<MoveWork> mvTask = null; if (!chDir) { - Utilities.LOG14535.info("Looking for MoveTask from createMoveTask"); mvTask = GenMapRedUtils.findMoveTaskForFsopOutput( mvTasks, fsOp.getConf().getFinalDirName(), fsOp.getConf().isMmTable()); } http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java index bc26c5e..3f38bd1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java @@ -223,7 +223,10 @@ public final class UnionProcFactory { FileSinkDesc fileSinkDesc = (FileSinkDesc) fileSinkOp.getConf().clone(); fileSinkDesc.setDirName(new Path(parentDirName, AbstractFileMergeOperator.UNION_SUDBIR_PREFIX + parent.getIdentifier())); fileSinkDesc.setLinkedFileSink(true); - Utilities.LOG14535.info("Created LinkedFileSink for union " + fileSinkDesc.getDirName() + "; parent " + parentDirName); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Created LinkedFileSink for union " + fileSinkDesc.getDirName() + + "; parent " + parentDirName); + } parent.setChildOperators(null); Operator<? extends OperatorDesc> tmpFileSinkOp = OperatorFactory.getAndMakeChild(fileSinkDesc, parent.getSchema(), parent); http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index cf1d14c..01cb2b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -316,7 +316,8 @@ public class GenTezUtils { linked.add(desc); desc.setDirName(new Path(path, AbstractFileMergeOperator.UNION_SUDBIR_PREFIX + linked.size())); - Utilities.LOG14535.info("removing union - new desc with " + desc.getDirName() + "; parent " + path); + Utilities.FILE_OP_LOGGER.debug("removing union - new desc with " + + desc.getDirName() + "; parent " + path); desc.setLinkedFileSink(true); desc.setLinkedFileSinkDesc(linked); } @@ -384,7 +385,8 @@ public class GenTezUtils { // If underlying data is RCFile or OrcFile, RCFileBlockMerge task or // OrcFileStripeMerge task would be created. LOG.info("using CombineHiveInputformat for the merge job"); - Utilities.LOG14535.info("will generate MR work for merging files from " + fileSink.getConf().getDirName() + " to " + finalName); + Utilities.FILE_OP_LOGGER.debug("will generate MR work for merging files from " + + fileSink.getConf().getDirName() + " to " + finalName); GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, context.dependencyTask, context.moveTask, hconf, context.currentTask); http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index b9f28eb..41b5156 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -367,9 +367,11 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer { Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME); Path destPath = !MetaStoreUtils.isInsertOnlyTable(table.getParameters()) ? x.getCtx().getExternalTmpPath(tgtPath) : new Path(tgtPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); - Utilities.LOG14535.info("adding import work for table with source location: " - + dataPath + "; table: " + tgtPath + "; copy destination " + destPath + "; mm " - + txnId + " (src " + isSourceMm + ") for " + (table == null ? "a new table" : table.getTableName())); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("adding import work for table with source location: " + + dataPath + "; table: " + tgtPath + "; copy destination " + destPath + "; mm " + txnId + + " (src " + isSourceMm + ") for " + (table == null ? "a new table" : table.getTableName())); + } Task<?> copyTask = null; if (replicationSpec.isInReplicationScope()) { @@ -458,9 +460,11 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer { Path destPath = !MetaStoreUtils.isInsertOnlyTable(table.getParameters()) ? x.getCtx().getExternalTmpPath(tgtLocation) : new Path(tgtLocation, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); Path moveTaskSrc = !MetaStoreUtils.isInsertOnlyTable(table.getParameters()) ? destPath : tgtLocation; - Utilities.LOG14535.info("adding import work for partition with source location: " + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("adding import work for partition with source location: " + srcLocation + "; target: " + tgtLocation + "; copy dest " + destPath + "; mm " + txnId + " (src " + isSourceMm + ") for " + partSpecToString(partSpec.getPartSpec())); + } Task<?> copyTask = null; @@ -484,9 +488,6 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer { loadTableWork.setTxnId(txnId); loadTableWork.setStmtId(stmtId); loadTableWork.setInheritTableSpecs(false); - // Do not commit the write ID from each task; need to commit once. - // TODO: we should just change the import to use a single MoveTask, like dynparts. - loadTableWork.setIntermediateInMmWrite(isAcid(txnId)); Task<?> loadPartTask = TaskFactory.get(new MoveWork( x.getInputs(), x.getOutputs(), loadTableWork, null, false, SessionState.get().getLineageState()), x.getConf()); copyTask.addDependentTask(loadPartTask); http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index df50aab..29d6520 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6834,7 +6834,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } else { queryTmpdir = ctx.getTempDirForPath(dest_path, true); } - Utilities.LOG14535.info("create filesink w/DEST_TABLE specifying " + queryTmpdir + " from " + dest_path); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_TABLE specifying " + queryTmpdir + + " from " + dest_path); + } if (dpCtx != null) { // set the root of the temporary path where dynamic partition columns will populate dpCtx.setRootPath(queryTmpdir); @@ -6914,7 +6917,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { isMmTable = MetaStoreUtils.isInsertOnlyTable(dest_tab.getParameters()); queryTmpdir = isMmTable ? dest_path : ctx.getTempDirForPath(dest_path, true); - Utilities.LOG14535.info("create filesink w/DEST_PARTITION specifying " + queryTmpdir + " from " + dest_path); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_PARTITION specifying " + + queryTmpdir + " from " + dest_path); + } table_desc = Utilities.getTableDesc(dest_tab); // Add sorting/bucketing if needed @@ -6996,7 +7002,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { try { Path qPath = FileUtils.makeQualified(dest_path, conf); queryTmpdir = isMmTable ? qPath : ctx.getTempDirForPath(qPath, true); - Utilities.LOG14535.info("Setting query directory " + queryTmpdir + " from " + dest_path + " (" + isMmTable + ")"); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Setting query directory " + queryTmpdir + + " from " + dest_path + " (" + isMmTable + ")"); + } } catch (Exception e) { throw new SemanticException("Error creating temporary folder on: " + dest_path, e); http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 752a934..a63f709 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -342,9 +342,14 @@ public abstract class TaskCompiler { dataSinkForCtas.setDirName(location); location = new Path(location, AcidUtils.deltaSubdir(txnIdForCtas, txnIdForCtas, stmtId)); lfd.setSourcePath(location); - Utilities.LOG14535.info("Setting MM CTAS to " + location); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Setting MM CTAS to " + location); + } + } + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("Location for LFD is being set to " + + location + "; moving from " + lfd.getSourcePath()); } - Utilities.LOG14535.info("Location for LFD is being set to " + location + "; moving from " + lfd.getSourcePath()); lfd.setTargetDir(location); } http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index a0e024c..129347b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -171,7 +171,7 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver, if(lbLevel == 0) { // static partition without list bucketing long totalSz = getMergeSize(inpFs, dirPath, avgConditionSize); - Utilities.LOG14535.info("merge resolve simple case - totalSz " + totalSz + " from " + dirPath); + Utilities.FILE_OP_LOGGER.debug("merge resolve simple case - totalSz " + totalSz + " from " + dirPath); if (totalSz >= 0) { // add the merge job setupMapRedWork(conf, work, trgtSize, totalSz); @@ -186,7 +186,7 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver, } } } else { - Utilities.LOG14535.info("Resolver returning movetask for " + dirPath); + Utilities.FILE_OP_LOGGER.info("Resolver returning movetask for " + dirPath); resTsks.add(mvTask); } } catch (IOException e) { @@ -229,7 +229,6 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver, Task<? extends Serializable> mrTask, Task<? extends Serializable> mrAndMvTask, Path dirPath, FileSystem inpFs, ConditionalResolverMergeFilesCtx ctx, MapWork work, int dpLbLevel) throws IOException { - Utilities.LOG14535.info("generateActualTasks for " + dirPath); DynamicPartitionCtx dpCtx = ctx.getDPCtx(); // get list of dynamic partitions FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(dirPath, dpLbLevel, inpFs); @@ -240,7 +239,7 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver, Path path = ptpi.keySet().iterator().next(); PartitionDesc partDesc = ptpi.get(path); TableDesc tblDesc = partDesc.getTableDesc(); - Utilities.LOG14535.info("merge resolver removing " + path); + Utilities.FILE_OP_LOGGER.debug("merge resolver removing " + path); work.removePathToPartitionInfo(path); // the root path is not useful anymore // cleanup pathToAliases @@ -263,14 +262,14 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver, PartitionDesc pDesc = (dpCtx != null) ? generateDPFullPartSpec(dpCtx, status, tblDesc, i) : partDesc; if (pDesc == null) { - Utilities.LOG14535.warn("merger ignoring invalid DP path " + status[i].getPath()); + Utilities.FILE_OP_LOGGER.warn("merger ignoring invalid DP path " + status[i].getPath()); continue; } - Utilities.LOG14535.info("merge resolver will merge " + status[i].getPath()); + Utilities.FILE_OP_LOGGER.debug("merge resolver will merge " + status[i].getPath()); work.resolveDynamicPartitionStoredAsSubDirsMerge(conf, status[i].getPath(), tblDesc, aliases, pDesc); } else { - Utilities.LOG14535.info("merge resolver will move " + status[i].getPath()); + Utilities.FILE_OP_LOGGER.debug("merge resolver will move " + status[i].getPath()); toMove.add(status[i].getPath()); } @@ -375,7 +374,7 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver, long totalSz = 0; int numFiles = 0; for (FileStatus fStat : fStats) { - Utilities.LOG14535.info("Resolver looking at " + fStat.getPath()); + Utilities.FILE_OP_LOGGER.debug("Resolver looking at " + fStat.getPath()); if (fStat.isDir()) { AverageSize avgSzDir = getAverageSize(inpFs, fStat.getPath()); if (avgSzDir.getTotalSize() < 0) { http://git-wip-us.apache.org/repos/asf/hive/blob/55d9ceee/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java index d90acce..30d9912 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java @@ -73,7 +73,9 @@ public class LoadFileDesc extends LoadDesc implements Serializable { final boolean isDfsDir, final String columns, final String columnTypes, AcidUtils.Operation writeType, boolean isMmCtas) { super(sourcePath, writeType); - Utilities.LOG14535.info("creating LFD from " + sourcePath + " to " + targetDir); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { + Utilities.FILE_OP_LOGGER.trace("creating LFD from " + sourcePath + " to " + targetDir); + } this.targetDir = targetDir; this.isDfsDir = isDfsDir; this.columns = columns;
