This is an automated email from the ASF dual-hosted git repository.

zhangbutao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new a455933d3d5 HIVE-27930: Insert/Load overwrite table partition does not 
clean up directory before overwriting (#4915)(Kiran Velumuri, reviewed by 
Indhumathi Muthumurugesh, Butao Zhang)
a455933d3d5 is described below

commit a455933d3d57fce20b053fa0f4fd023b28bd22de
Author: Kiran Velumuri <kiranvelumuri...@gmail.com>
AuthorDate: Wed Dec 13 19:44:35 2023 +0530

    HIVE-27930: Insert/Load overwrite table partition does not clean up 
directory before overwriting (#4915)(Kiran Velumuri, reviewed by Indhumathi 
Muthumurugesh, Butao Zhang)
---
 .../org/apache/hadoop/hive/ql/metadata/Hive.java   |   7 +
 .../insert_and_load_overwrite_drop_partition.q     |  61 +++++
 .../insert_and_load_overwrite_drop_partition.q.out | 256 +++++++++++++++++++++
 3 files changed, 324 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 5022b77fc36..cca06126369 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -5542,6 +5542,13 @@ private void constructOneLBLocationMap(FileStatus fSta,
         throw new HiveException("Getting globStatus " + srcf.toString(), e);
       }
 
+      // For insert/load overwrite cases, where external.table.purge is 
disabled for the table, there may be stale
+      // partitions present in the table location after Alter table drop 
partition operation. In such cases, oldPath will be
+      // null, since those partitions will not be present in metastore. Added 
below check to clean up those stale partitions.
+      if (oldPath == null && isInsertOverwrite) {
+        deleteOldPathForReplace(destf, destf, conf, purge, deletePathFilter, 
isNeedRecycle);
+      }
+
       // the extra check is required to make ALTER TABLE ... CONCATENATE work
       if (oldPath != null && (srcs != null || isInsertOverwrite)) {
         deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter, 
isNeedRecycle);
diff --git 
a/ql/src/test/queries/clientpositive/insert_and_load_overwrite_drop_partition.q 
b/ql/src/test/queries/clientpositive/insert_and_load_overwrite_drop_partition.q
new file mode 100644
index 00000000000..fabf4902280
--- /dev/null
+++ 
b/ql/src/test/queries/clientpositive/insert_and_load_overwrite_drop_partition.q
@@ -0,0 +1,61 @@
+CREATE EXTERNAL TABLE `table1`(
+  `name` string,
+  `number` string)
+PARTITIONED BY (
+  `part_col` string);
+
+CREATE EXTERNAL TABLE `table2`(
+  `name` string,
+  `number` string)
+PARTITIONED BY (
+  `part_col` string);
+
+insert into table table1 values ('a', '10', 'part1');
+insert into table table1 values ('b', '11', 'part1');
+insert into table table1 values ('a2', '2', 'part2');
+
+insert into table table2 values ('x', '100', 'part1');
+insert into table table2 values ('y', '101', 'part1');
+insert into table table2 values ('z', '102', 'part1');
+insert into table table2 values ('x2', '200', 'part2');
+insert into table table2 values ('y2', '201', 'part2');
+insert into table table2 values ('x3', '300', 'part3');
+
+--non empty input case
+alter table table2 drop partition(part_col='part1');
+
+select count(*) from table2 where part_col='part1';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part1;
+
+insert overwrite table table2 partition(part_col='part1') select name, number 
from table1 where part_col='part1';
+
+select count(*) from table2 where part_col='part1';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part1;
+
+--empty input case
+alter table table2 drop partition(part_col='part2');
+
+select count(*) from table2 where part_col='part2';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part2;
+
+insert overwrite table table2 partition(part_col='part2') select name, number 
from table1 where part_col='dummy_part';
+
+select count(*) from table2 where part_col='part2';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part2;
+
+--load overwrite partition
+alter table table2 drop partition(part_col='part3');
+
+select count(*) from table2 where part_col='part3';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part3;
+
+LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' OVERWRITE INTO TABLE table2 
PARTITION(part_col='part3');
+
+select count(*) from table2 where part_col='part3';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part3;
\ No newline at end of file
diff --git 
a/ql/src/test/results/clientpositive/llap/insert_and_load_overwrite_drop_partition.q.out
 
b/ql/src/test/results/clientpositive/llap/insert_and_load_overwrite_drop_partition.q.out
new file mode 100644
index 00000000000..c16cb464568
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/insert_and_load_overwrite_drop_partition.q.out
@@ -0,0 +1,256 @@
+PREHOOK: query: CREATE EXTERNAL TABLE `table1`(
+  `name` string,
+  `number` string)
+PARTITIONED BY (
+  `part_col` string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table1
+POSTHOOK: query: CREATE EXTERNAL TABLE `table1`(
+  `name` string,
+  `number` string)
+PARTITIONED BY (
+  `part_col` string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table1
+PREHOOK: query: CREATE EXTERNAL TABLE `table2`(
+  `name` string,
+  `number` string)
+PARTITIONED BY (
+  `part_col` string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table2
+POSTHOOK: query: CREATE EXTERNAL TABLE `table2`(
+  `name` string,
+  `number` string)
+PARTITIONED BY (
+  `part_col` string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table2
+PREHOOK: query: insert into table table1 values ('a', '10', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table1
+POSTHOOK: query: insert into table table1 values ('a', '10', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table1
+POSTHOOK: Output: default@table1@part_col=part1
+POSTHOOK: Lineage: table1 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table1 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table1 values ('b', '11', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table1
+POSTHOOK: query: insert into table table1 values ('b', '11', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table1
+POSTHOOK: Output: default@table1@part_col=part1
+POSTHOOK: Lineage: table1 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table1 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table1 values ('a2', '2', 'part2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table1
+POSTHOOK: query: insert into table table1 values ('a2', '2', 'part2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table1
+POSTHOOK: Output: default@table1@part_col=part2
+POSTHOOK: Lineage: table1 PARTITION(part_col=part2).name SCRIPT []
+POSTHOOK: Lineage: table1 PARTITION(part_col=part2).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('x', '100', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('x', '100', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part1
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('y', '101', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('y', '101', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part1
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('z', '102', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('z', '102', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part1
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('x2', '200', 'part2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('x2', '200', 'part2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part2
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('y2', '201', 'part2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('y2', '201', 'part2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part2
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('x3', '300', 'part3')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('x3', '300', 'part3')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part3
+POSTHOOK: Lineage: table2 PARTITION(part_col=part3).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part3).number SCRIPT []
+PREHOOK: query: alter table table2 drop partition(part_col='part1')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@table2
+PREHOOK: Output: default@table2@part_col=part1
+POSTHOOK: query: alter table table2 drop partition(part_col='part1')
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@table2
+POSTHOOK: Output: default@table2@part_col=part1
+PREHOOK: query: select count(*) from table2 where part_col='part1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+#### A masked pattern was here ####
+0
+Found 3 items
+#### A masked pattern was here ####
+PREHOOK: query: insert overwrite table table2 partition(part_col='part1') 
select name, number from table1 where part_col='part1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Input: default@table1@part_col=part1
+PREHOOK: Output: default@table2@part_col=part1
+POSTHOOK: query: insert overwrite table table2 partition(part_col='part1') 
select name, number from table1 where part_col='part1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Input: default@table1@part_col=part1
+POSTHOOK: Output: default@table2@part_col=part1
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SIMPLE 
[(table1)table1.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SIMPLE 
[(table1)table1.FieldSchema(name:number, type:string, comment:null), ]
+PREHOOK: query: select count(*) from table2 where part_col='part1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+PREHOOK: Input: default@table2@part_col=part1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+POSTHOOK: Input: default@table2@part_col=part1
+#### A masked pattern was here ####
+2
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: alter table table2 drop partition(part_col='part2')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@table2
+PREHOOK: Output: default@table2@part_col=part2
+POSTHOOK: query: alter table table2 drop partition(part_col='part2')
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@table2
+POSTHOOK: Output: default@table2@part_col=part2
+PREHOOK: query: select count(*) from table2 where part_col='part2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+#### A masked pattern was here ####
+0
+Found 2 items
+#### A masked pattern was here ####
+PREHOOK: query: insert overwrite table table2 partition(part_col='part2') 
select name, number from table1 where part_col='dummy_part'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Output: default@table2@part_col=part2
+POSTHOOK: query: insert overwrite table table2 partition(part_col='part2') 
select name, number from table1 where part_col='dummy_part'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Output: default@table2@part_col=part2
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).name SIMPLE 
[(table1)table1.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).number SIMPLE 
[(table1)table1.FieldSchema(name:number, type:string, comment:null), ]
+PREHOOK: query: select count(*) from table2 where part_col='part2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+PREHOOK: Input: default@table2@part_col=part2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+POSTHOOK: Input: default@table2@part_col=part2
+#### A masked pattern was here ####
+0
+PREHOOK: query: alter table table2 drop partition(part_col='part3')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@table2
+PREHOOK: Output: default@table2@part_col=part3
+POSTHOOK: query: alter table table2 drop partition(part_col='part3')
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@table2
+POSTHOOK: Output: default@table2@part_col=part3
+PREHOOK: query: select count(*) from table2 where part_col='part3'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part3'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+#### A masked pattern was here ####
+0
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' OVERWRITE 
INTO TABLE table2 PARTITION(part_col='part3')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@table2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' OVERWRITE 
INTO TABLE table2 PARTITION(part_col='part3')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part3
+PREHOOK: query: select count(*) from table2 where part_col='part3'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+PREHOOK: Input: default@table2@part_col=part3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part3'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+POSTHOOK: Input: default@table2@part_col=part3
+#### A masked pattern was here ####
+24
+Found 1 items
+#### A masked pattern was here ####

Reply via email to