Repository: hive Updated Branches: refs/heads/llap e073cce23 -> bc75d72b8
HIVE-13661: [Refactor] Move common FS operations out of shim layer (Ashutosh Chauhan via Sergey Shelukhin) Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e1b03837 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e1b03837 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e1b03837 Branch: refs/heads/llap Commit: e1b03837749c538ea2da7a1dddbedc2ead49e0ee Parents: e9a7218 Author: Ashutosh Chauhan <[email protected]> Authored: Sat Apr 30 18:04:37 2016 -0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Mon May 2 18:16:19 2016 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/common/FileUtils.java | 19 +-- .../hive/metastore/HiveMetaStoreFsImpl.java | 11 +- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 20 +-- .../apache/hadoop/hive/ql/exec/MoveTask.java | 7 +- .../apache/hadoop/hive/ql/metadata/Hive.java | 27 +--- .../hadoop/hive/ql/session/SessionState.java | 2 +- .../apache/hadoop/hive/shims/Hadoop23Shims.java | 148 ------------------ .../org/apache/hadoop/hive/io/HdfsUtils.java | 156 ++++++++++++++++--- .../apache/hadoop/hive/shims/HadoopShims.java | 41 ----- .../hadoop/hive/shims/HadoopShimsSecure.java | 10 -- 10 files changed, 164 insertions(+), 277 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/common/src/java/org/apache/hadoop/hive/common/FileUtils.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index f7d41cd..71c9188 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -36,10 +36,11 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.Trash; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.io.HdfsUtils; import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatus; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; @@ -526,11 +527,9 @@ public final class FileUtils { if (!success) { return false; } else { - HadoopShims shim = ShimLoader.getHadoopShims(); - HdfsFileStatus fullFileStatus = shim.getFullFileStatus(conf, fs, lastExistingParent); try { //set on the entire subtree - shim.setFullFileStatus(conf, fullFileStatus, fs, firstNonExistentParent); + HdfsUtils.setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, fs, lastExistingParent), fs, firstNonExistentParent); } catch (Exception e) { LOG.warn("Error setting permissions of " + firstNonExistentParent, e); } @@ -566,9 +565,8 @@ public final class FileUtils { boolean inheritPerms = conf.getBoolVar(HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS); if (copied && inheritPerms) { - HdfsFileStatus fullFileStatus = shims.getFullFileStatus(conf, dstFS, dst); try { - shims.setFullFileStatus(conf, fullFileStatus, dstFS, dst); + HdfsUtils.setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, dstFS, dst.getParent()), dstFS, dst); } catch (Exception e) { LOG.warn("Error setting permissions or group of " + dst, e); } @@ -620,12 +618,11 @@ public final class FileUtils { */ public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf, boolean forceDelete) throws IOException { - LOG.info("deleting " + f); - HadoopShims hadoopShim = ShimLoader.getHadoopShims(); + LOG.debug("deleting " + f); boolean result = false; try { - result = hadoopShim.moveToAppropriateTrash(fs, f, conf); + result = Trash.moveToAppropriateTrash(fs, f, conf); if (result) { LOG.info("Moved to trash: " + f); return true; @@ -687,10 +684,8 @@ public final class FileUtils { } else { //rename the directory if (fs.rename(sourcePath, destPath)) { - HadoopShims shims = ShimLoader.getHadoopShims(); - HdfsFileStatus fullFileStatus = shims.getFullFileStatus(conf, fs, destPath.getParent()); try { - shims.setFullFileStatus(conf, fullFileStatus, fs, destPath); + HdfsUtils.setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, fs, destPath.getParent()), fs, destPath); } catch (Exception e) { LOG.warn("Error setting permissions or group of " + destPath, e); } http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreFsImpl.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreFsImpl.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreFsImpl.java index 9e7dcfc..df698c8 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreFsImpl.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreFsImpl.java @@ -25,10 +25,8 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.fs.Trash; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; public class HiveMetaStoreFsImpl implements MetaStoreFS { @@ -38,19 +36,18 @@ public class HiveMetaStoreFsImpl implements MetaStoreFS { @Override public boolean deleteDir(FileSystem fs, Path f, boolean recursive, boolean ifPurge, Configuration conf) throws MetaException { - LOG.info("deleting " + f); - HadoopShims hadoopShim = ShimLoader.getHadoopShims(); + LOG.debug("deleting " + f); try { if (ifPurge) { LOG.info("Not moving "+ f +" to trash"); - } else if (hadoopShim.moveToAppropriateTrash(fs, f, conf)) { + } else if (Trash.moveToAppropriateTrash(fs, f, conf)) { LOG.info("Moved to trash: " + f); return true; } if (fs.delete(f, true)) { - LOG.info("Deleted the diretory " + f); + LOG.debug("Deleted the diretory " + f); return true; } http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 39a1efc..5c20caa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.io.HdfsUtils; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.PartitionDropOptions; import org.apache.hadoop.hive.metastore.TableType; @@ -216,9 +217,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatus; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.tools.HadoopArchives; @@ -2394,7 +2392,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { /** * Write a list of the user defined functions to a file. - * @param db + * @param db * * @param showFuncs * are the functions we're interested in. @@ -2447,7 +2445,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { /** * Write a list of the current locks to a file. - * @param db + * @param db * * @param showLocks * the locks we're interested in. @@ -2725,7 +2723,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { /** * Lock the table/partition specified - * @param db + * @param db * * @param lockTbl * the table/partition to be locked along with the mode @@ -2771,7 +2769,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { /** * Unlock the table/partition specified - * @param db + * @param db * * @param unlockTbl * the table/partition to be unlocked @@ -2787,7 +2785,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { /** * Shows a description of a function. - * @param db + * @param db * * @param descFunc * is the function we are describing @@ -4190,15 +4188,13 @@ public class DDLTask extends Task<DDLWork> implements Serializable { try { // this is not transactional - HadoopShims shim = ShimLoader.getHadoopShims(); for (Path location : getLocations(db, table, partSpec)) { FileSystem fs = location.getFileSystem(conf); - - HdfsFileStatus fullFileStatus = shim.getFullFileStatus(conf, fs, location); + HdfsUtils.HadoopFileStatus status = new HdfsUtils.HadoopFileStatus(conf, fs, location); fs.delete(location, true); fs.mkdirs(location); try { - shim.setFullFileStatus(conf, fullFileStatus, fs, location); + HdfsUtils.setFullFileStatus(conf, status, fs, location); } catch (Exception e) { LOG.warn("Error setting permissions of " + location, e); } http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 54592cc..c2c6c65 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -33,6 +33,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.io.HdfsUtils; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; @@ -65,8 +66,6 @@ import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -179,11 +178,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable { actualPath = actualPath.getParent(); } fs.mkdirs(mkDirPath); - HadoopShims shims = ShimLoader.getHadoopShims(); if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS)) { try { - HadoopShims.HdfsFileStatus status = shims.getFullFileStatus(conf, fs, actualPath); - shims.setFullFileStatus(conf, status, fs, actualPath); + HdfsUtils.setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, fs, actualPath), fs, mkDirPath); } catch (Exception e) { LOG.warn("Error setting permissions or group of " + actualPath, e); } http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 4d9c3d2..26c458c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -65,6 +65,7 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPri import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.io.HdfsUtils; import org.apache.hadoop.hive.metastore.HiveMetaException; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; @@ -2611,9 +2612,9 @@ private void constructOneLBLocationMap(FileStatus fSta, FileStatus[] srcs, final FileSystem srcFs, final Path destf, final boolean isSrcLocal, final List<Path> newFiles) throws HiveException { - final HadoopShims.HdfsFileStatus fullDestStatus; + final HdfsUtils.HadoopFileStatus fullDestStatus; try { - fullDestStatus = ShimLoader.getHadoopShims().getFullFileStatus(conf, destFs, destf); + fullDestStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf); } catch (IOException e1) { throw new HiveException(e1); } @@ -2674,7 +2675,7 @@ private void constructOneLBLocationMap(FileStatus fSta, } if (inheritPerms) { - ShimLoader.getHadoopShims().setFullFileStatus(conf, fullDestStatus, destFs, destPath); + HdfsUtils.setFullFileStatus(conf, fullDestStatus, destFs, destPath); } if (null != newFiles) { newFiles.add(destPath); @@ -2697,17 +2698,6 @@ private void constructOneLBLocationMap(FileStatus fSta, } } - private static boolean destExists(List<List<Path[]>> result, Path proposed) { - for (List<Path[]> sdpairs : result) { - for (Path[] sdpair : sdpairs) { - if (sdpair[1].equals(proposed)) { - return true; - } - } - } - return false; - } - private static boolean isSubDir(Path srcf, Path destf, FileSystem srcFs, FileSystem destFs, boolean isSrcLocal) { if (srcf == null) { LOG.debug("The source path is null for isSubDir method."); @@ -2795,8 +2785,7 @@ private void constructOneLBLocationMap(FileStatus fSta, //needed for perm inheritance. boolean inheritPerms = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS); - HadoopShims shims = ShimLoader.getHadoopShims(); - HadoopShims.HdfsFileStatus destStatus = null; + HdfsUtils.HadoopFileStatus destStatus = null; // If source path is a subdirectory of the destination path: // ex: INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300; @@ -2808,7 +2797,7 @@ private void constructOneLBLocationMap(FileStatus fSta, try { if (inheritPerms || replace) { try{ - destStatus = shims.getFullFileStatus(conf, destFs, destf); + destStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf); //if destf is an existing directory: //if replace is true, delete followed by rename(mv) is equivalent to replace //if replace is false, rename (mv) actually move the src under dest dir @@ -2821,7 +2810,7 @@ private void constructOneLBLocationMap(FileStatus fSta, } catch (FileNotFoundException ignore) { //if dest dir does not exist, any re if (inheritPerms) { - destStatus = shims.getFullFileStatus(conf, destFs, destf.getParent()); + destStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf.getParent()); } } } @@ -2888,7 +2877,7 @@ private void constructOneLBLocationMap(FileStatus fSta, if (success && inheritPerms) { try { - ShimLoader.getHadoopShims().setFullFileStatus(conf, destStatus, destFs, destf); + HdfsUtils.setFullFileStatus(conf, destStatus, destFs, destf); } catch (IOException e) { LOG.warn("Error setting permission of file " + destf + ": "+ e.getMessage(), e); } http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index d211eb2..37ef165 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -445,7 +445,7 @@ public class SessionState { if ("hdfs".equals(fs.getUri().getScheme())) { hdfsEncryptionShim = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, sessionConf); } else { - LOG.info("Could not get hdfsEncryptionShim, it is only applicable to hdfs filesystem."); + LOG.debug("Could not get hdfsEncryptionShim, it is only applicable to hdfs filesystem."); } } catch (Exception e) { throw new HiveException(e); http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java ---------------------------------------------------------------------- diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index c21088f..64b8780 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -47,7 +47,6 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; @@ -55,10 +54,6 @@ import org.apache.hadoop.fs.ProxyFileSystem; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.Trash; import org.apache.hadoop.fs.TrashPolicy; -import org.apache.hadoop.fs.permission.AclEntry; -import org.apache.hadoop.fs.permission.AclEntryScope; -import org.apache.hadoop.fs.permission.AclEntryType; -import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSClient; @@ -101,11 +96,6 @@ import org.apache.hadoop.util.Progressable; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.tez.test.MiniTezCluster; -import com.google.common.base.Joiner; -import com.google.common.base.Objects; -import com.google.common.base.Predicate; -import com.google.common.collect.Iterables; - /** * Implemention of shims against Hadoop 0.23.0. */ @@ -257,10 +247,6 @@ public class Hadoop23Shims extends HadoopShimsSecure { return conf.get("yarn.resourcemanager.webapp.address"); } - protected boolean isExtendedAclEnabled(Configuration conf) { - return Objects.equal(conf.get("dfs.namenode.acls.enabled"), "true"); - } - @Override public long getDefaultBlockSize(FileSystem fs, Path path) { return fs.getDefaultBlockSize(path); @@ -272,12 +258,6 @@ public class Hadoop23Shims extends HadoopShimsSecure { } @Override - public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException { - return Trash.moveToAppropriateTrash(fs, path, conf); - } - - @Override public void setTotalOrderPartitionFile(JobConf jobConf, Path partitionFile){ TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); } @@ -808,134 +788,6 @@ public class Hadoop23Shims extends HadoopShimsSecure { stream.hflush(); } - @Override - public HdfsFileStatus getFullFileStatus(Configuration conf, FileSystem fs, - Path file) throws IOException { - FileStatus fileStatus = fs.getFileStatus(file); - AclStatus aclStatus = null; - if (isExtendedAclEnabled(conf)) { - //Attempt extended Acl operations only if its enabled, but don't fail the operation regardless. - try { - aclStatus = fs.getAclStatus(file); - } catch (Exception e) { - LOG.info("Skipping ACL inheritance: File system for path " + file + " " + - "does not support ACLs but dfs.namenode.acls.enabled is set to true. "); - LOG.debug("The details are: " + e, e); - } - } - return new Hadoop23FileStatus(fileStatus, aclStatus); - } - - @Override - public void setFullFileStatus(Configuration conf, HdfsFileStatus sourceStatus, - FileSystem fs, Path target) throws IOException { - String group = sourceStatus.getFileStatus().getGroup(); - //use FsShell to change group, permissions, and extended ACL's recursively - try { - FsShell fsShell = new FsShell(); - fsShell.setConf(conf); - //If there is no group of a file, no need to call chgrp - if (group != null && !group.isEmpty()) { - run(fsShell, new String[]{"-chgrp", "-R", group, target.toString()}); - } - - if (isExtendedAclEnabled(conf)) { - //Attempt extended Acl operations only if its enabled, 8791but don't fail the operation regardless. - try { - AclStatus aclStatus = ((Hadoop23FileStatus) sourceStatus).getAclStatus(); - if (aclStatus != null) { - List<AclEntry> aclEntries = aclStatus.getEntries(); - removeBaseAclEntries(aclEntries); - - //the ACL api's also expect the tradition user/group/other permission in the form of ACL - FsPermission sourcePerm = sourceStatus.getFileStatus().getPermission(); - aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.USER, sourcePerm.getUserAction())); - aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.GROUP, sourcePerm.getGroupAction())); - aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.OTHER, sourcePerm.getOtherAction())); - - //construct the -setfacl command - String aclEntry = Joiner.on(",").join(aclStatus.getEntries()); - run(fsShell, new String[]{"-setfacl", "-R", "--set", aclEntry, target.toString()}); - } - } catch (Exception e) { - LOG.info("Skipping ACL inheritance: File system for path " + target + " " + - "does not support ACLs but dfs.namenode.acls.enabled is set to true. "); - LOG.debug("The details are: " + e, e); - } - } else { - String permission = Integer.toString(sourceStatus.getFileStatus().getPermission().toShort(), 8); - run(fsShell, new String[]{"-chmod", "-R", permission, target.toString()}); - } - } catch (Exception e) { - throw new IOException("Unable to set permissions of " + target, e); - } - try { - if (LOG.isDebugEnabled()) { //some trace logging - getFullFileStatus(conf, fs, target).debugLog(); - } - } catch (Exception e) { - //ignore. - } - } - - public class Hadoop23FileStatus implements HdfsFileStatus { - private final FileStatus fileStatus; - private final AclStatus aclStatus; - public Hadoop23FileStatus(FileStatus fileStatus, AclStatus aclStatus) { - this.fileStatus = fileStatus; - this.aclStatus = aclStatus; - } - @Override - public FileStatus getFileStatus() { - return fileStatus; - } - public AclStatus getAclStatus() { - return aclStatus; - } - @Override - public void debugLog() { - if (fileStatus != null) { - LOG.debug(fileStatus.toString()); - } - if (aclStatus != null) { - LOG.debug(aclStatus.toString()); - } - } - } - - /** - * Create a new AclEntry with scope, type and permission (no name). - * - * @param scope - * AclEntryScope scope of the ACL entry - * @param type - * AclEntryType ACL entry type - * @param permission - * FsAction set of permissions in the ACL entry - * @return AclEntry new AclEntry - */ - private AclEntry newAclEntry(AclEntryScope scope, AclEntryType type, - FsAction permission) { - return new AclEntry.Builder().setScope(scope).setType(type) - .setPermission(permission).build(); - } - - /** - * Removes basic permission acls (unamed acls) from the list of acl entries - * @param entries acl entries to remove from. - */ - private void removeBaseAclEntries(List<AclEntry> entries) { - Iterables.removeIf(entries, new Predicate<AclEntry>() { - @Override - public boolean apply(AclEntry input) { - if (input.getName() == null) { - return true; - } - return false; - } - }); - } - class ProxyFileSystem23 extends ProxyFileSystem { public ProxyFileSystem23(FileSystem fs) { super(fs); http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java ---------------------------------------------------------------------- diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java b/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java index c90b34c..e931156 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java @@ -19,43 +19,155 @@ package org.apache.hadoop.hive.io; import java.io.IOException; +import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclEntryScope; +import org.apache.hadoop.fs.permission.AclEntryType; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -public class HdfsUtils { - private static final HadoopShims SHIMS = ShimLoader.getHadoopShims(); - private static final Log LOG = LogFactory.getLog(HdfsUtils.class); +import com.google.common.base.Joiner; +import com.google.common.base.Objects; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; - public static long getFileId(FileSystem fileSystem, Path path) throws IOException { - String pathStr = path.toUri().getPath(); - if (fileSystem instanceof DistributedFileSystem) { - return SHIMS.getFileId(fileSystem, pathStr); - } - // If we are not on DFS, we just hash the file name + size and hope for the best. - // TODO: we assume it only happens in tests. Fix? - int nameHash = pathStr.hashCode(); - long fileSize = fileSystem.getFileStatus(path).getLen(); - long id = ((fileSize ^ (fileSize >>> 32)) << 32) | ((long)nameHash & 0xffffffffL); - LOG.warn("Cannot get unique file ID from " - + fileSystem.getClass().getSimpleName() + "; using " + id + "(" + pathStr - + "," + nameHash + "," + fileSize + ")"); - return id; - } +public class HdfsUtils { // TODO: this relies on HDFS not changing the format; we assume if we could get inode ID, this // is still going to work. Otherwise, file IDs can be turned off. Later, we should use // as public utility method in HDFS to obtain the inode-based path. private static String HDFS_ID_PATH_PREFIX = "/.reserved/.inodes/"; + static Logger LOG = LoggerFactory.getLogger("shims.HdfsUtils"); public static Path getFileIdPath( FileSystem fileSystem, Path path, long fileId) { return (fileSystem instanceof DistributedFileSystem) ? new Path(HDFS_ID_PATH_PREFIX + fileId) : path; } + + public static void setFullFileStatus(Configuration conf, HdfsUtils.HadoopFileStatus sourceStatus, + FileSystem fs, Path target) throws IOException { + FileStatus fStatus= sourceStatus.getFileStatus(); + String group = fStatus.getGroup(); + LOG.trace(sourceStatus.getFileStatus().toString()); + //use FsShell to change group, permissions, and extended ACL's recursively + FsShell fsShell = new FsShell(); + fsShell.setConf(conf); + + try { + //If there is no group of a file, no need to call chgrp + if (group != null && !group.isEmpty()) { + run(fsShell, new String[]{"-chgrp", "-R", group, target.toString()}); + } + + if (Objects.equal(conf.get("dfs.namenode.acls.enabled"), "true")) { + //Attempt extended Acl operations only if its enabled, 8791but don't fail the operation regardless. + try { + AclStatus aclStatus = sourceStatus.getAclStatus(); + if (aclStatus != null) { + LOG.trace(aclStatus.toString()); + List<AclEntry> aclEntries = aclStatus.getEntries(); + removeBaseAclEntries(aclEntries); + + //the ACL api's also expect the tradition user/group/other permission in the form of ACL + FsPermission sourcePerm = fStatus.getPermission(); + aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.USER, sourcePerm.getUserAction())); + aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.GROUP, sourcePerm.getGroupAction())); + aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.OTHER, sourcePerm.getOtherAction())); + + //construct the -setfacl command + String aclEntry = Joiner.on(",").join(aclStatus.getEntries()); + run(fsShell, new String[]{"-setfacl", "-R", "--set", aclEntry, target.toString()}); + } + } catch (Exception e) { + LOG.info("Skipping ACL inheritance: File system for path " + target + " " + + "does not support ACLs but dfs.namenode.acls.enabled is set to true. "); + LOG.debug("The details are: " + e, e); + } + } else { + String permission = Integer.toString(fStatus.getPermission().toShort(), 8); + run(fsShell, new String[]{"-chmod", "-R", permission, target.toString()}); + } + } catch (Exception e) { + throw new IOException("Unable to set permissions of " + target, e); + } + } + + /** + * Create a new AclEntry with scope, type and permission (no name). + * + * @param scope + * AclEntryScope scope of the ACL entry + * @param type + * AclEntryType ACL entry type + * @param permission + * FsAction set of permissions in the ACL entry + * @return AclEntry new AclEntry + */ + private static AclEntry newAclEntry(AclEntryScope scope, AclEntryType type, + FsAction permission) { + return new AclEntry.Builder().setScope(scope).setType(type) + .setPermission(permission).build(); + } + /** + * Removes basic permission acls (unamed acls) from the list of acl entries + * @param entries acl entries to remove from. + */ + private static void removeBaseAclEntries(List<AclEntry> entries) { + Iterables.removeIf(entries, new Predicate<AclEntry>() { + @Override + public boolean apply(AclEntry input) { + if (input.getName() == null) { + return true; + } + return false; + } + }); + } + + private static void run(FsShell shell, String[] command) throws Exception { + LOG.debug(ArrayUtils.toString(command)); + int retval = shell.run(command); + LOG.debug("Return value is :" + retval); + } +public static class HadoopFileStatus { + + private final FileStatus fileStatus; + private final AclStatus aclStatus; + + public HadoopFileStatus(Configuration conf, FileSystem fs, Path file) throws IOException { + + FileStatus fileStatus = fs.getFileStatus(file); + AclStatus aclStatus = null; + if (Objects.equal(conf.get("dfs.namenode.acls.enabled"), "true")) { + //Attempt extended Acl operations only if its enabled, but don't fail the operation regardless. + try { + aclStatus = fs.getAclStatus(file); + } catch (Exception e) { + LOG.info("Skipping ACL inheritance: File system for path " + file + " " + + "does not support ACLs but dfs.namenode.acls.enabled is set to true. "); + LOG.debug("The details are: " + e, e); + } + }this.fileStatus = fileStatus; + this.aclStatus = aclStatus; + } + + public FileStatus getFileStatus() { + return fileStatus; + } + public AclStatus getAclStatus() { + return aclStatus; + } +} } http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java ---------------------------------------------------------------------- diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java index 4b9119b..37eb8f6 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -179,18 +179,6 @@ public interface HadoopShims { public String getJobLauncherHttpAddress(Configuration conf); /** - * Move the directory/file to trash. In case of the symlinks or mount points, the file is - * moved to the trashbin in the actual volume of the path p being deleted - * @param fs - * @param path - * @param conf - * @return false if the item is already in the trash or trash is disabled - * @throws IOException - */ - public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException; - - /** * Get the default block size for the path. FileSystem alone is not sufficient to * determine the same, as in case of CSMT the underlying file system determines that. * @param fs @@ -292,35 +280,6 @@ public interface HadoopShims { */ public void hflush(FSDataOutputStream stream) throws IOException; - /** - * For a given file, return a file status - * @param conf - * @param fs - * @param file - * @return - * @throws IOException - */ - public HdfsFileStatus getFullFileStatus(Configuration conf, FileSystem fs, Path file) throws IOException; - - /** - * For a given file, set a given file status. - * @param conf - * @param sourceStatus - * @param fs - * @param target - * @throws IOException - */ - public void setFullFileStatus(Configuration conf, HdfsFileStatus sourceStatus, - FileSystem fs, Path target) throws IOException; - - /** - * Includes the vanilla FileStatus, and AclStatus if it applies to this version of hadoop. - */ - public interface HdfsFileStatus { - public FileStatus getFileStatus(); - public void debugLog(); - } - public interface HdfsFileStatusWithId { public FileStatus getFileStatus(); public Long getFileId(); http://git-wip-us.apache.org/repos/asf/hive/blob/e1b03837/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java ---------------------------------------------------------------------- diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java index 63d48a5..87682e6 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java @@ -373,21 +373,11 @@ public abstract class HadoopShimsSecure implements HadoopShims { abstract public long getDefaultBlockSize(FileSystem fs, Path path); @Override - abstract public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException; - - @Override abstract public FileSystem createProxyFileSystem(FileSystem fs, URI uri); @Override abstract public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException; - protected void run(FsShell shell, String[] command) throws Exception { - LOG.debug(ArrayUtils.toString(command)); - int retval = shell.run(command); - LOG.debug("Return value is :" + retval); - } - private static String[] dedup(String[] locations) throws IOException { Set<String> dedup = new HashSet<String>(); Collections.addAll(dedup, locations);
