This is an automated email from the ASF dual-hosted git repository. aasha pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 8785dd2 HIVE-25350. Replication fails for external tables on setting owner/groups. (#2498)(Ayush Saxena, reviewed by Arko Sharma) 8785dd2 is described below commit 8785dd23c72e7a3fb61da1bce34ae33ff8c7561d Author: Ayush Saxena <ayushsax...@apache.org> AuthorDate: Mon Aug 9 16:15:51 2021 +0530 HIVE-25350. Replication fails for external tables on setting owner/groups. (#2498)(Ayush Saxena, reviewed by Arko Sharma) --- .../TestReplicationScenariosExternalTables.java | 45 ++++++++++++++--- .../hadoop/hive/ql/exec/repl/DirCopyTask.java | 59 ++++++++++++++-------- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java index 3e794ef..44fdd00 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java @@ -87,6 +87,7 @@ import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLI import static org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.INC_BOOTSTRAP_ROOT_DIR_NAME; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; public class TestReplicationScenariosExternalTables extends BaseReplicationAcrossInstances { @@ -357,6 +358,7 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros "/" + testName.getMethodName() + "/" + primaryDbName + "/" + "a/"); DistributedFileSystem fs = primary.miniDFSCluster.getFileSystem(); fs.mkdirs(externalTableLocation, new FsPermission("777")); + fs.setOwner(externalTableLocation,"user1","group1"); Path externalFileLoc = new Path(externalTableLocation, "file1.txt"); try (FSDataOutputStream outputStream = fs.create(externalFileLoc)) { @@ -382,11 +384,9 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros fs.modifyAclEntries(externalTableLocation, aclEntries); fs.modifyAclEntries(externalFileLoc, aclEntries); - // Run bootstrap with distcp options to preserve ACL. - List<String> withClause = Arrays - .asList("'distcp.options.update'=''", "'distcp.options.puga'=''", - "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname - + "'='true'"); + // Run bootstrap without distcp options to preserve options. + List<String> withClause = Arrays.asList("'distcp.options.update'=''", + "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'"); primary.run("use " + primaryDbName).run( "create external table a (i int, j int) " @@ -400,13 +400,41 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros .verifyResults(new String[] {"1", "13"}).run("select j from a") .verifyResults(new String[] {"2", "21"}); - // Verify the ACL's of the destination table directory and data file are - // same as that of source. + // Verify the attributes of the destination table directory and data file are + // not same as that of source. Hive hiveForReplica = Hive.get(replica.hiveConf); org.apache.hadoop.hive.ql.metadata.Table replicaTable = hiveForReplica.getTable(replicatedDbName + ".a"); Path dataLocation = replicaTable.getDataLocation(); + assertNotEquals("ACL entries are same for the data file.", + fs.getAclStatus(externalFileLoc).getEntries().size(), + fs.getAclStatus(new Path(dataLocation, "file1.txt")).getEntries() + .size()); + assertNotEquals("ACL entries are same for the table directory.", + fs.getAclStatus(externalTableLocation).getEntries().size(), + fs.getAclStatus(dataLocation).getEntries().size()); + + assertNotEquals(fs.getFileStatus(externalTableLocation).getOwner(), fs.getFileStatus(dataLocation).getOwner()); + assertNotEquals(fs.getFileStatus(externalTableLocation).getGroup(), fs.getFileStatus(dataLocation).getGroup()); + + // Dump & load with preserve attributes set. + withClause = Arrays + .asList("'distcp.options.update'=''", "'distcp.options.pugpa'=''", + "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + + "'='true'"); + + primary.run("use " + primaryDbName).dump(primaryDbName, withClause); + + // Verify load is success and has the appropriate data. + replica.load(replicatedDbName, primaryDbName, withClause) + .run("use " + replicatedDbName).run("select i From a") + .verifyResults(new String[] {"1", "13"}).run("select j from a") + .verifyResults(new String[] {"2", "21"}); + + // Verify the ACL's of the destination table directory and data file are + // same as that of source. + assertEquals("ACL entries are not same for the data file.", fs.getAclStatus(externalFileLoc).getEntries().size(), fs.getAclStatus(new Path(dataLocation, "file1.txt")).getEntries() @@ -414,6 +442,9 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros assertEquals("ACL entries are not same for the table directory.", fs.getAclStatus(externalTableLocation).getEntries().size(), fs.getAclStatus(dataLocation).getEntries().size()); + + assertEquals(fs.getFileStatus(externalTableLocation).getOwner(), fs.getFileStatus(dataLocation).getOwner()); + assertEquals(fs.getFileStatus(externalTableLocation).getGroup(), fs.getFileStatus(dataLocation).getGroup()); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java index 2e0154a..0890082 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java @@ -85,39 +85,56 @@ public class DirCopyTask extends Task<DirCopyWork> implements Serializable { } LOG.info("Setting permission for path dest {} from source {} owner {} : {} : {}", destPath, sourcePath, status.getOwner(), status.getGroup(), status.getPermission()); - destPath.getFileSystem(clonedConf).setOwner(destPath, status.getOwner(), status.getGroup()); - destPath.getFileSystem(clonedConf).setPermission(destPath, status.getPermission()); - setAclsToTarget(status, sourcePath, destPath, clonedConf); + preserveDistCpAttributes(destPath, sourcePath, clonedConf, status); return createdDir; } - private void setAclsToTarget(FileStatus sourceStatus, Path sourcePath, Path destPath, HiveConf clonedConf) throws IOException { + private void preserveDistCpAttributes(Path destPath, Path sourcePath, HiveConf clonedConf, FileStatus status) + throws IOException { + String preserveString = getPreserveString(clonedConf); + LOG.info("Preserving DistCp Attributes: {}", preserveString); + FileSystem destFs = destPath.getFileSystem(clonedConf); + // If both preserve user and group are specified. + if (preserveString.contains("u") && preserveString.contains("g")) { + destFs.setOwner(destPath, status.getOwner(), status.getGroup()); + } else if (preserveString.contains("u")) { + // If only preserve user is specified. + destFs.setOwner(destPath, status.getOwner(), null); + } else if (preserveString.contains("g")) { + // If only preserve group is specified. + destFs.setOwner(destPath, null, status.getGroup()); + } + // Preserve permissions + if (preserveString.contains("p")) { + destFs.setPermission(destPath, status.getPermission()); + } + // Preserve ACL + if (preserveString.contains("a")) { + setAclsToTarget(status, sourcePath, destPath, clonedConf); + } + } + + private void setAclsToTarget(FileStatus sourceStatus, Path sourcePath, Path destPath, HiveConf clonedConf) + throws IOException { // Check if distCp options contains preserve ACL. - if (isPreserveAcl(clonedConf)) { - AclStatus sourceAcls = - sourcePath.getFileSystem(clonedConf).getAclStatus(sourcePath); - if (sourceAcls != null && sourceAcls.getEntries().size() > 0) { - destPath.getFileSystem(clonedConf).removeAcl(destPath); - List<AclEntry> effectiveAclEntries = AclUtil - .getAclFromPermAndEntries(sourceStatus.getPermission(), - sourceAcls.getEntries()); - destPath.getFileSystem(clonedConf).setAcl(destPath, effectiveAclEntries); - } + AclStatus sourceAcls = sourcePath.getFileSystem(clonedConf).getAclStatus(sourcePath); + if (sourceAcls != null && sourceAcls.getEntries().size() > 0) { + destPath.getFileSystem(clonedConf).removeAcl(destPath); + List<AclEntry> effectiveAclEntries = + AclUtil.getAclFromPermAndEntries(sourceStatus.getPermission(), sourceAcls.getEntries()); + destPath.getFileSystem(clonedConf).setAcl(destPath, effectiveAclEntries); } } - private boolean isPreserveAcl(HiveConf clonedConf) { + private String getPreserveString(HiveConf clonedConf) { List<String> distCpOptions = constructDistCpOptions(clonedConf); for (String option : distCpOptions) { if (option.startsWith("-p")) { - if (option.contains("a")) { - return true; - } else { - return false; - } + + return option.replaceFirst("-p", ""); } } - return false; + return ""; } private boolean setTargetPathOwner(Path targetPath, Path sourcePath, UserGroupInformation proxyUser,