This is an automated email from the ASF dual-hosted git repository. zhangbutao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new a455933d3d5 HIVE-27930: Insert/Load overwrite table partition does not clean up directory before overwriting (#4915)(Kiran Velumuri, reviewed by Indhumathi Muthumurugesh, Butao Zhang) a455933d3d5 is described below commit a455933d3d57fce20b053fa0f4fd023b28bd22de Author: Kiran Velumuri <kiranvelumuri...@gmail.com> AuthorDate: Wed Dec 13 19:44:35 2023 +0530 HIVE-27930: Insert/Load overwrite table partition does not clean up directory before overwriting (#4915)(Kiran Velumuri, reviewed by Indhumathi Muthumurugesh, Butao Zhang) --- .../org/apache/hadoop/hive/ql/metadata/Hive.java | 7 + .../insert_and_load_overwrite_drop_partition.q | 61 +++++ .../insert_and_load_overwrite_drop_partition.q.out | 256 +++++++++++++++++++++ 3 files changed, 324 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 5022b77fc36..cca06126369 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -5542,6 +5542,13 @@ private void constructOneLBLocationMap(FileStatus fSta, throw new HiveException("Getting globStatus " + srcf.toString(), e); } + // For insert/load overwrite cases, where external.table.purge is disabled for the table, there may be stale + // partitions present in the table location after Alter table drop partition operation. In such cases, oldPath will be + // null, since those partitions will not be present in metastore. Added below check to clean up those stale partitions. + if (oldPath == null && isInsertOverwrite) { + deleteOldPathForReplace(destf, destf, conf, purge, deletePathFilter, isNeedRecycle); + } + // the extra check is required to make ALTER TABLE ... CONCATENATE work if (oldPath != null && (srcs != null || isInsertOverwrite)) { deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter, isNeedRecycle); diff --git a/ql/src/test/queries/clientpositive/insert_and_load_overwrite_drop_partition.q b/ql/src/test/queries/clientpositive/insert_and_load_overwrite_drop_partition.q new file mode 100644 index 00000000000..fabf4902280 --- /dev/null +++ b/ql/src/test/queries/clientpositive/insert_and_load_overwrite_drop_partition.q @@ -0,0 +1,61 @@ +CREATE EXTERNAL TABLE `table1`( + `name` string, + `number` string) +PARTITIONED BY ( + `part_col` string); + +CREATE EXTERNAL TABLE `table2`( + `name` string, + `number` string) +PARTITIONED BY ( + `part_col` string); + +insert into table table1 values ('a', '10', 'part1'); +insert into table table1 values ('b', '11', 'part1'); +insert into table table1 values ('a2', '2', 'part2'); + +insert into table table2 values ('x', '100', 'part1'); +insert into table table2 values ('y', '101', 'part1'); +insert into table table2 values ('z', '102', 'part1'); +insert into table table2 values ('x2', '200', 'part2'); +insert into table table2 values ('y2', '201', 'part2'); +insert into table table2 values ('x3', '300', 'part3'); + +--non empty input case +alter table table2 drop partition(part_col='part1'); + +select count(*) from table2 where part_col='part1'; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part1; + +insert overwrite table table2 partition(part_col='part1') select name, number from table1 where part_col='part1'; + +select count(*) from table2 where part_col='part1'; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part1; + +--empty input case +alter table table2 drop partition(part_col='part2'); + +select count(*) from table2 where part_col='part2'; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part2; + +insert overwrite table table2 partition(part_col='part2') select name, number from table1 where part_col='dummy_part'; + +select count(*) from table2 where part_col='part2'; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part2; + +--load overwrite partition +alter table table2 drop partition(part_col='part3'); + +select count(*) from table2 where part_col='part3'; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part3; + +LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' OVERWRITE INTO TABLE table2 PARTITION(part_col='part3'); + +select count(*) from table2 where part_col='part3'; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part3; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/insert_and_load_overwrite_drop_partition.q.out b/ql/src/test/results/clientpositive/llap/insert_and_load_overwrite_drop_partition.q.out new file mode 100644 index 00000000000..c16cb464568 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/insert_and_load_overwrite_drop_partition.q.out @@ -0,0 +1,256 @@ +PREHOOK: query: CREATE EXTERNAL TABLE `table1`( + `name` string, + `number` string) +PARTITIONED BY ( + `part_col` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: CREATE EXTERNAL TABLE `table1`( + `name` string, + `number` string) +PARTITIONED BY ( + `part_col` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: CREATE EXTERNAL TABLE `table2`( + `name` string, + `number` string) +PARTITIONED BY ( + `part_col` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: CREATE EXTERNAL TABLE `table2`( + `name` string, + `number` string) +PARTITIONED BY ( + `part_col` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table1 values ('a', '10', 'part1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values ('a', '10', 'part1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table1 +POSTHOOK: Output: default@table1@part_col=part1 +POSTHOOK: Lineage: table1 PARTITION(part_col=part1).name SCRIPT [] +POSTHOOK: Lineage: table1 PARTITION(part_col=part1).number SCRIPT [] +PREHOOK: query: insert into table table1 values ('b', '11', 'part1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values ('b', '11', 'part1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table1 +POSTHOOK: Output: default@table1@part_col=part1 +POSTHOOK: Lineage: table1 PARTITION(part_col=part1).name SCRIPT [] +POSTHOOK: Lineage: table1 PARTITION(part_col=part1).number SCRIPT [] +PREHOOK: query: insert into table table1 values ('a2', '2', 'part2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values ('a2', '2', 'part2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table1 +POSTHOOK: Output: default@table1@part_col=part2 +POSTHOOK: Lineage: table1 PARTITION(part_col=part2).name SCRIPT [] +POSTHOOK: Lineage: table1 PARTITION(part_col=part2).number SCRIPT [] +PREHOOK: query: insert into table table2 values ('x', '100', 'part1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values ('x', '100', 'part1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Output: default@table2@part_col=part1 +POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SCRIPT [] +POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SCRIPT [] +PREHOOK: query: insert into table table2 values ('y', '101', 'part1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values ('y', '101', 'part1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Output: default@table2@part_col=part1 +POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SCRIPT [] +POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SCRIPT [] +PREHOOK: query: insert into table table2 values ('z', '102', 'part1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values ('z', '102', 'part1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Output: default@table2@part_col=part1 +POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SCRIPT [] +POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SCRIPT [] +PREHOOK: query: insert into table table2 values ('x2', '200', 'part2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values ('x2', '200', 'part2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Output: default@table2@part_col=part2 +POSTHOOK: Lineage: table2 PARTITION(part_col=part2).name SCRIPT [] +POSTHOOK: Lineage: table2 PARTITION(part_col=part2).number SCRIPT [] +PREHOOK: query: insert into table table2 values ('y2', '201', 'part2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values ('y2', '201', 'part2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Output: default@table2@part_col=part2 +POSTHOOK: Lineage: table2 PARTITION(part_col=part2).name SCRIPT [] +POSTHOOK: Lineage: table2 PARTITION(part_col=part2).number SCRIPT [] +PREHOOK: query: insert into table table2 values ('x3', '300', 'part3') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values ('x3', '300', 'part3') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table2 +POSTHOOK: Output: default@table2@part_col=part3 +POSTHOOK: Lineage: table2 PARTITION(part_col=part3).name SCRIPT [] +POSTHOOK: Lineage: table2 PARTITION(part_col=part3).number SCRIPT [] +PREHOOK: query: alter table table2 drop partition(part_col='part1') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2@part_col=part1 +POSTHOOK: query: alter table table2 drop partition(part_col='part1') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2@part_col=part1 +PREHOOK: query: select count(*) from table2 where part_col='part1' +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from table2 where part_col='part1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +0 +Found 3 items +#### A masked pattern was here #### +PREHOOK: query: insert overwrite table table2 partition(part_col='part1') select name, number from table1 where part_col='part1' +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +PREHOOK: Input: default@table1@part_col=part1 +PREHOOK: Output: default@table2@part_col=part1 +POSTHOOK: query: insert overwrite table table2 partition(part_col='part1') select name, number from table1 where part_col='part1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +POSTHOOK: Input: default@table1@part_col=part1 +POSTHOOK: Output: default@table2@part_col=part1 +POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SIMPLE [(table1)table1.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SIMPLE [(table1)table1.FieldSchema(name:number, type:string, comment:null), ] +PREHOOK: query: select count(*) from table2 where part_col='part1' +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +PREHOOK: Input: default@table2@part_col=part1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from table2 where part_col='part1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +POSTHOOK: Input: default@table2@part_col=part1 +#### A masked pattern was here #### +2 +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: alter table table2 drop partition(part_col='part2') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2@part_col=part2 +POSTHOOK: query: alter table table2 drop partition(part_col='part2') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2@part_col=part2 +PREHOOK: query: select count(*) from table2 where part_col='part2' +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from table2 where part_col='part2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +0 +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: insert overwrite table table2 partition(part_col='part2') select name, number from table1 where part_col='dummy_part' +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table2@part_col=part2 +POSTHOOK: query: insert overwrite table table2 partition(part_col='part2') select name, number from table1 where part_col='dummy_part' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table2@part_col=part2 +POSTHOOK: Lineage: table2 PARTITION(part_col=part2).name SIMPLE [(table1)table1.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: table2 PARTITION(part_col=part2).number SIMPLE [(table1)table1.FieldSchema(name:number, type:string, comment:null), ] +PREHOOK: query: select count(*) from table2 where part_col='part2' +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +PREHOOK: Input: default@table2@part_col=part2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from table2 where part_col='part2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +POSTHOOK: Input: default@table2@part_col=part2 +#### A masked pattern was here #### +0 +PREHOOK: query: alter table table2 drop partition(part_col='part3') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2@part_col=part3 +POSTHOOK: query: alter table table2 drop partition(part_col='part3') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2@part_col=part3 +PREHOOK: query: select count(*) from table2 where part_col='part3' +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from table2 where part_col='part3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +0 +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' OVERWRITE INTO TABLE table2 PARTITION(part_col='part3') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@table2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' OVERWRITE INTO TABLE table2 PARTITION(part_col='part3') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@table2 +POSTHOOK: Output: default@table2@part_col=part3 +PREHOOK: query: select count(*) from table2 where part_col='part3' +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +PREHOOK: Input: default@table2@part_col=part3 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from table2 where part_col='part3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +POSTHOOK: Input: default@table2@part_col=part3 +#### A masked pattern was here #### +24 +Found 1 items +#### A masked pattern was here ####