Repository: hive Updated Branches: refs/heads/branch-2.0 f40c1502d -> 67d095d89
HIVE-12505:Insert overwrite in same encrypted zone silently fails to remove some existing files (Chaoyu Tang, reviewed by Aihua Xu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f9791beb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f9791beb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f9791beb Branch: refs/heads/branch-2.0 Commit: f9791bebaa9d71ff62cecedb0243d09e99abfb38 Parents: f40c150 Author: ctang <ctang...@gmail.com> Authored: Fri Dec 4 10:46:23 2015 -0500 Committer: ctang <ctang...@gmail.com> Committed: Sat Dec 5 19:50:25 2015 -0500 ---------------------------------------------------------------------- .../apache/hadoop/hive/common/FileUtils.java | 66 ++++++++-- .../test/resources/testconfiguration.properties | 3 +- .../apache/hadoop/hive/ql/metadata/Hive.java | 26 +++- .../clientpositive/encryption_with_trash.q | 33 +++++ .../encrypted/encryption_with_trash.q.out | 122 +++++++++++++++++++ 5 files changed, 234 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/common/src/java/org/apache/hadoop/hive/common/FileUtils.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index f943781..5dd9f40 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -574,7 +574,7 @@ public final class FileUtils { } /** - * Deletes all files under a directory, sending them to the trash. Leaves the directory as is. + * Trashes or deletes all files under a directory. Leaves the directory as is. * @param fs FileSystem to use * @param f path of directory * @param conf hive configuration @@ -582,17 +582,34 @@ public final class FileUtils { * @throws FileNotFoundException * @throws IOException */ - public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf) throws FileNotFoundException, IOException { + public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf) + throws FileNotFoundException, IOException { + return trashFilesUnderDir(fs, f, conf, true); + } + + /** + * Trashes or deletes all files under a directory. Leaves the directory as is. + * @param fs FileSystem to use + * @param f path of directory + * @param conf hive configuration + * @param forceDelete whether to force delete files if trashing does not succeed + * @return true if deletion successful + * @throws FileNotFoundException + * @throws IOException + */ + public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf, + boolean forceDelete) throws FileNotFoundException, IOException { FileStatus[] statuses = fs.listStatus(f, HIDDEN_FILES_PATH_FILTER); boolean result = true; for (FileStatus status : statuses) { - result = result & moveToTrash(fs, status.getPath(), conf); + result = result & moveToTrash(fs, status.getPath(), conf, forceDelete); } return result; } /** - * Move a particular file or directory to the trash. + * Move a particular file or directory to the trash. If for a certain reason the trashing fails + * it will force deletes the file or directory * @param fs FileSystem to use * @param f path of file or directory to move to trash. * @param conf @@ -600,18 +617,47 @@ public final class FileUtils { * @throws IOException */ public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf) throws IOException { + return moveToTrash(fs, f, conf, true); + } + + /** + * Move a particular file or directory to the trash. + * @param fs FileSystem to use + * @param f path of file or directory to move to trash. + * @param conf + * @param forceDelete whether force delete the file or directory if trashing fails + * @return true if move successful + * @throws IOException + */ + public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf, boolean forceDelete) + throws IOException { LOG.info("deleting " + f); HadoopShims hadoopShim = ShimLoader.getHadoopShims(); - if (hadoopShim.moveToAppropriateTrash(fs, f, conf)) { - LOG.info("Moved to trash: " + f); - return true; + boolean result = false; + try { + result = hadoopShim.moveToAppropriateTrash(fs, f, conf); + if (result) { + LOG.info("Moved to trash: " + f); + return true; + } + } catch (IOException ioe) { + if (forceDelete) { + // for whatever failure reason including that trash has lower encryption zone + // retry with force delete + LOG.warn(ioe.getMessage() + "; Force to delete it."); + } else { + throw ioe; + } } - boolean result = fs.delete(f, true); - if (!result) { - LOG.error("Failed to delete " + f); + if (forceDelete) { + result = fs.delete(f, true); + if (!result) { + LOG.error("Failed to delete " + f); + } } + return result; } http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 523f894..f2b4d80 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -474,7 +474,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\ encryption_drop_table.q \ encryption_insert_values.q \ encryption_drop_view.q \ - encryption_drop_partition.q + encryption_drop_partition.q \ + encryption_with_trash.q beeline.positive.exclude=add_part_exist.q,\ alter1.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 488d923..a4ada24 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2953,19 +2953,35 @@ private void constructOneLBLocationMap(FileStatus fSta, List<List<Path[]>> result = checkPaths(conf, destFs, srcs, srcFs, destf, true); if (oldPath != null) { + boolean oldPathDeleted = false; + boolean isOldPathUnderDestf = false; try { FileSystem fs2 = oldPath.getFileSystem(conf); if (fs2.exists(oldPath)) { // Do not delete oldPath if: // - destf is subdir of oldPath //if ( !(fs2.equals(destf.getFileSystem(conf)) && FileUtils.isSubDir(oldPath, destf, fs2))) - if (FileUtils.isSubDir(oldPath, destf, fs2)) { - FileUtils.trashFilesUnderDir(fs2, oldPath, conf); + isOldPathUnderDestf = FileUtils.isSubDir(oldPath, destf, fs2); + if (isOldPathUnderDestf) { + // if oldPath is destf or its subdir, its should definitely be deleted, otherwise its + // existing content might result in incorrect (extra) data. + // But not sure why we changed not to delete the oldPath in HIVE-8750 if it is + // not the destf or its subdir? + oldPathDeleted = FileUtils.trashFilesUnderDir(fs2, oldPath, conf); } } - } catch (Exception e) { - //swallow the exception - LOG.warn("Directory " + oldPath.toString() + " cannot be removed: " + e, e); + } catch (IOException e) { + if (isOldPathUnderDestf) { + // if oldPath is a subdir of destf but it could not be cleaned + throw new HiveException("Directory " + oldPath.toString() + + " could not be cleaned up.", e); + } else { + //swallow the exception since it won't affect the final result + LOG.warn("Directory " + oldPath.toString() + " cannot be cleaned: " + e, e); + } + } + if (isOldPathUnderDestf && !oldPathDeleted) { + throw new HiveException("Destination directory " + destf + " has not be cleaned up."); } } http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/ql/src/test/queries/clientpositive/encryption_with_trash.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/encryption_with_trash.q b/ql/src/test/queries/clientpositive/encryption_with_trash.q new file mode 100644 index 0000000..8f8789a --- /dev/null +++ b/ql/src/test/queries/clientpositive/encryption_with_trash.q @@ -0,0 +1,33 @@ +set fs.trash.interval=5 + +-- SORT_QUERY_RESULTS + +-- init +drop table IF EXISTS encryptedTableSrc PURGE; +drop table IF EXISTS unencryptedTable PURGE; + +create table encryptedTableSrc(key string, value string) +LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc'; + +create table encryptedTable(key string, value string) partitioned by (ds string) + LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTable'; +CRYPTO CREATE_KEY --keyName key_1 --bitLength 128; +CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc; +CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTable; + +-- insert src table from values +insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502'); + +insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc; +select count(*) from encryptedTable where ds='today'; +insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc; +select count(*) from encryptedTable where ds='today'; + +insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc; +select count(*) from encryptedTable where ds='today'; + +-- clean up +drop table encryptedTable PURGE; +drop table unencryptedTable PURGE; +CRYPTO DELETE_KEY --keyName key_1; +set fs.trash.interval=0 http://git-wip-us.apache.org/repos/asf/hive/blob/f9791beb/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out new file mode 100644 index 0000000..3d1f75f --- /dev/null +++ b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out @@ -0,0 +1,122 @@ +Warning: Value had a \n character in it. +PREHOOK: query: drop table IF EXISTS unencryptedTable PURGE +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table IF EXISTS unencryptedTable PURGE +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table encryptedTableSrc(key string, value string) +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@encryptedTableSrc +POSTHOOK: query: create table encryptedTableSrc(key string, value string) +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encryptedTableSrc +PREHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string) +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@encryptedTable +POSTHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string) +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encryptedTable +Encryption key created: 'key_1' +Encryption zone created: '/build/ql/test/data/warehouse/encryptedTableSrc' using key: 'key_1' +Encryption zone created: '/build/ql/test/data/warehouse/encryptedTable' using key: 'key_1' +PREHOOK: query: -- insert src table from values +insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@encryptedtablesrc +POSTHOOK: query: -- insert src table from values +insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@encryptedtablesrc +POSTHOOK: Lineage: encryptedtablesrc.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: encryptedtablesrc.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc +PREHOOK: type: QUERY +PREHOOK: Input: default@encryptedtablesrc +PREHOOK: Output: default@encryptedtable@ds=today +POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encryptedtablesrc +POSTHOOK: Output: default@encryptedtable@ds=today +POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from encryptedTable where ds='today' +PREHOOK: type: QUERY +PREHOOK: Input: default@encryptedtable +PREHOOK: Input: default@encryptedtable@ds=today +#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging +POSTHOOK: query: select count(*) from encryptedTable where ds='today' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encryptedtable +POSTHOOK: Input: default@encryptedtable@ds=today +#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging +2 +PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc +PREHOOK: type: QUERY +PREHOOK: Input: default@encryptedtablesrc +PREHOOK: Output: default@encryptedtable@ds=today +POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encryptedtablesrc +POSTHOOK: Output: default@encryptedtable@ds=today +POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from encryptedTable where ds='today' +PREHOOK: type: QUERY +PREHOOK: Input: default@encryptedtable +PREHOOK: Input: default@encryptedtable@ds=today +#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging +POSTHOOK: query: select count(*) from encryptedTable where ds='today' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encryptedtable +POSTHOOK: Input: default@encryptedtable@ds=today +#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging +4 +PREHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc +PREHOOK: type: QUERY +PREHOOK: Input: default@encryptedtablesrc +PREHOOK: Output: default@encryptedtable@ds=today +POSTHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encryptedtablesrc +POSTHOOK: Output: default@encryptedtable@ds=today +POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from encryptedTable where ds='today' +PREHOOK: type: QUERY +PREHOOK: Input: default@encryptedtable +PREHOOK: Input: default@encryptedtable@ds=today +#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging +POSTHOOK: query: select count(*) from encryptedTable where ds='today' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encryptedtable +POSTHOOK: Input: default@encryptedtable@ds=today +#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging +2 +PREHOOK: query: -- clean up +drop table encryptedTable PURGE +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@encryptedtable +PREHOOK: Output: default@encryptedtable +POSTHOOK: query: -- clean up +drop table encryptedTable PURGE +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@encryptedtable +POSTHOOK: Output: default@encryptedtable +PREHOOK: query: drop table unencryptedTable PURGE +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table unencryptedTable PURGE +POSTHOOK: type: DROPTABLE +Encryption key deleted: 'key_1'