Author: hashutosh
Date: Mon Jul 14 17:01:56 2014
New Revision: 1610458
URL: http://svn.apache.org/r1610458
Log:
HIVE-7213 : COUNT(*) returns out-dated count value after TRUNCATE (Ashutosh
Chauhan via Navis)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
hive/trunk/ql/src/test/queries/clientpositive/truncate_table.q
hive/trunk/ql/src/test/results/clientpositive/truncate_table.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1610458&r1=1610457&r2=1610458&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Mon Jul
14 17:01:56 2014
@@ -53,6 +53,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -4567,24 +4568,49 @@ public class DDLTask extends Task<DDLWor
}
private List<Path> getLocations(Hive db, Table table, Map<String, String>
partSpec)
- throws HiveException {
+ throws HiveException, InvalidOperationException {
List<Path> locations = new ArrayList<Path>();
if (partSpec == null) {
if (table.isPartitioned()) {
for (Partition partition : db.getPartitions(table)) {
locations.add(partition.getDataLocation());
+ if (needToUpdateStats(partition.getParameters())) {
+ db.alterPartition(table.getDbName(), table.getTableName(),
partition);
+ }
}
} else {
locations.add(table.getPath());
+ if (needToUpdateStats(table.getParameters())) {
+ db.alterTable(table.getDbName()+"."+table.getTableName(), table);
+ }
}
} else {
for (Partition partition : db.getPartitionsByNames(table, partSpec)) {
locations.add(partition.getDataLocation());
+ if (needToUpdateStats(partition.getParameters())) {
+ db.alterPartition(table.getDbName(), table.getTableName(),
partition);
+ }
}
}
return locations;
}
+ private boolean needToUpdateStats(Map<String,String> props) {
+ if (null == props) {
+ return false;
+ }
+ boolean statsPresent = false;
+ for (String stat : StatsSetupConst.supportedStats) {
+ String statVal = props.get(stat);
+ if (statVal != null && Long.parseLong(statVal) > 0) {
+ statsPresent = true;
+ props.put(statVal, "0");
+ props.put(StatsSetupConst.COLUMN_STATS_ACCURATE, "false");
+ }
+ }
+ return statsPresent;
+ }
+
private String escapeHiveCommand(String str) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); i ++) {
Modified: hive/trunk/ql/src/test/queries/clientpositive/truncate_table.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/truncate_table.q?rev=1610458&r1=1610457&r2=1610458&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/truncate_table.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/truncate_table.q Mon Jul 14
17:01:56 2014
@@ -12,24 +12,31 @@ load data local inpath '../../data/files
load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate
partition (ds='2008-04-09', hr='11');
load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate
partition (ds='2008-04-09', hr='12');
+analyze table src_truncate compute statistics;
+analyze table srcpart_truncate partition(ds,hr) compute statistics;
set hive.fetch.task.conversion=more;
+set hive.compute.query.using.stats=true;
-- truncate non-partitioned table
explain TRUNCATE TABLE src_truncate;
TRUNCATE TABLE src_truncate;
select * from src_truncate;
+select count (*) from src_truncate;
-- truncate a partition
explain TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11');
TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11');
select * from srcpart_truncate where ds='2008-04-08' and hr='11';
+select count(*) from srcpart_truncate where ds='2008-04-08' and hr='11';
-- truncate partitions with partial spec
explain TRUNCATE TABLE srcpart_truncate partition (ds, hr='12');
TRUNCATE TABLE srcpart_truncate partition (ds, hr='12');
select * from srcpart_truncate where hr='12';
+select count(*) from srcpart_truncate where hr='12';
-- truncate partitioned table
explain TRUNCATE TABLE srcpart_truncate;
TRUNCATE TABLE srcpart_truncate;
select * from srcpart_truncate;
+select count(*) from srcpart_truncate;
Modified: hive/trunk/ql/src/test/results/clientpositive/truncate_table.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/truncate_table.q.out?rev=1610458&r1=1610457&r2=1610458&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/truncate_table.q.out
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/truncate_table.q.out Mon Jul
14 17:01:56 2014
@@ -80,6 +80,38 @@ POSTHOOK: query: load data local inpath
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcpart_truncate@ds=2008-04-09/hr=12
+PREHOOK: query: analyze table src_truncate compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_truncate
+PREHOOK: Output: default@src_truncate
+POSTHOOK: query: analyze table src_truncate compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_truncate
+POSTHOOK: Output: default@src_truncate
+PREHOOK: query: analyze table srcpart_truncate partition(ds,hr) compute
statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_truncate
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_truncate
+PREHOOK: Output: default@srcpart_truncate@ds=2008-04-08/hr=11
+PREHOOK: Output: default@srcpart_truncate@ds=2008-04-08/hr=12
+PREHOOK: Output: default@srcpart_truncate@ds=2008-04-09/hr=11
+PREHOOK: Output: default@srcpart_truncate@ds=2008-04-09/hr=12
+POSTHOOK: query: analyze table srcpart_truncate partition(ds,hr) compute
statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_truncate
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_truncate
+POSTHOOK: Output: default@srcpart_truncate@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@srcpart_truncate@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_truncate@ds=2008-04-09/hr=11
+POSTHOOK: Output: default@srcpart_truncate@ds=2008-04-09/hr=12
PREHOOK: query: -- truncate non-partitioned table
explain TRUNCATE TABLE src_truncate
PREHOOK: type: TRUNCATETABLE
@@ -109,6 +141,15 @@ POSTHOOK: query: select * from src_trunc
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src_truncate
#### A masked pattern was here ####
+PREHOOK: query: select count (*) from src_truncate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_truncate
+#### A masked pattern was here ####
+POSTHOOK: query: select count (*) from src_truncate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_truncate
+#### A masked pattern was here ####
+0
PREHOOK: query: -- truncate a partition
explain TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11')
PREHOOK: type: TRUNCATETABLE
@@ -143,6 +184,17 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart_truncate
POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=11
#### A masked pattern was here ####
+PREHOOK: query: select count(*) from srcpart_truncate where ds='2008-04-08'
and hr='11'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_truncate
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_truncate where ds='2008-04-08'
and hr='11'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_truncate
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+0
PREHOOK: query: -- truncate partitions with partial spec
explain TRUNCATE TABLE srcpart_truncate partition (ds, hr='12')
PREHOOK: type: TRUNCATETABLE
@@ -181,6 +233,19 @@ POSTHOOK: Input: default@srcpart_truncat
POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=12
POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=12
#### A masked pattern was here ####
+PREHOOK: query: select count(*) from srcpart_truncate where hr='12'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_truncate
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_truncate where hr='12'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_truncate
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+0
PREHOOK: query: -- truncate partitioned table
explain TRUNCATE TABLE srcpart_truncate
PREHOOK: type: TRUNCATETABLE
@@ -224,3 +289,20 @@ POSTHOOK: Input: default@srcpart_truncat
POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=12
#### A masked pattern was here ####
+PREHOOK: query: select count(*) from srcpart_truncate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_truncate
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_truncate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_truncate
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+0