This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 751fd73db24 HIVE-28551: Stale results when executing queries over
recreated tables (#6064)
751fd73db24 is described below
commit 751fd73db24f4b7e0d34dde2ff698bd8421e83f3
Author: Thomas Rebele <[email protected]>
AuthorDate: Fri Sep 12 10:40:03 2025 +0200
HIVE-28551: Stale results when executing queries over recreated tables
(#6064)
Co-authored-by: czxm <[email protected]>
---
.../hive/ql/cache/results/QueryResultsCache.java | 23 +++++---
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 6 ++-
.../clientpositive/results_cache_invalidation3.q | 16 ++++++
.../clientpositive/results_cache_invalidation4.q | 16 ++++++
.../llap/results_cache_invalidation3.q.out | 62 ++++++++++++++++++++++
.../llap/results_cache_invalidation4.q.out | 62 ++++++++++++++++++++++
6 files changed, 178 insertions(+), 7 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
index b613df1df44..6713219d2c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
@@ -65,7 +65,6 @@
import org.apache.hadoop.hive.ql.hooks.Entity.Type;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.events.EventConsumer;
@@ -86,13 +85,15 @@ public final class QueryResultsCache {
private static final Logger LOG =
LoggerFactory.getLogger(QueryResultsCache.class);
public static class LookupInfo {
- private String queryText;
- private Supplier<ValidTxnWriteIdList> txnWriteIdListProvider;
+ private final String queryText;
+ private final Supplier<ValidTxnWriteIdList> txnWriteIdListProvider;
+ private final Set<Long> tableIds;
- public LookupInfo(String queryText, Supplier<ValidTxnWriteIdList>
txnWriteIdListProvider) {
+ public LookupInfo(String queryText, Supplier<ValidTxnWriteIdList>
txnWriteIdListProvider, Set<Long> tableIds) {
super();
this.queryText = queryText;
this.txnWriteIdListProvider = txnWriteIdListProvider;
+ this.tableIds = tableIds;
}
public String getQueryText() {
@@ -429,7 +430,7 @@ public CacheEntry lookup(LookupInfo request) {
Set<CacheEntry> entriesToRemove = new HashSet<CacheEntry>();
Lock readLock = rwLock.readLock();
try {
- // Note: ReentrantReadWriteLock deos not allow upgrading a read lock to
a write lock.
+ // Note: ReentrantReadWriteLock does not allow upgrading a read lock to
a write lock.
// Care must be taken while under read lock, to make sure we do not
perform any actions
// which attempt to take a write lock.
readLock.lock();
@@ -671,10 +672,20 @@ public void notifyTableChanged(String dbName, String
tableName, long updateTime)
*/
private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry,
Set<CacheEntry> entriesToRemove) {
QueryInfo queryInfo = entry.getQueryInfo();
+
for (ReadEntity readEntity : queryInfo.getInputs()) {
- // Check that the tables used do not resolve to temp tables.
if (readEntity.getType() == Type.TABLE) {
Table tableUsed = readEntity.getTable();
+ // we want that the lookupInfo.tableIds are all covered by the table
ids of the cache entry
+ // the query is used as cache key, so the lookup and the entry should
use the same number of tables
+ // so it is enough to check whether every cache table id is contained
in the lookup
+ long id = tableUsed.getTTable().getId();
+ if (!lookupInfo.tableIds.contains(id)) {
+ LOG.debug("Cache entry contains a table (tableId={}) that is not
present in the query", id);
+ return false;
+ }
+
+ // Check that the tables used do not resolve to temp tables.
Map<String, Table> tempTables =
SessionHiveMetaStoreClient.getTempTablesForDatabase(tableUsed.getDbName(),
tableUsed.getTableName());
if (tempTables != null &&
tempTables.containsKey(tableUsed.getTableName())) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 170d0f4e473..1eccd4e10e4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -15186,7 +15186,11 @@ private QueryResultsCache.LookupInfo
createLookupInfoForQuery(ASTNode astNode) t
String queryString = getQueryStringForCache(astNode);
if (queryString != null) {
ValidTxnWriteIdList writeIdList = getQueryValidTxnWriteIdList();
- lookupInfo = new QueryResultsCache.LookupInfo(queryString, () ->
writeIdList);
+ Set<Long> involvedTables = tablesFromReadEntities(inputs).stream()
+ .map(Table::getTTable)
+ .map(org.apache.hadoop.hive.metastore.api.Table::getId)
+ .collect(Collectors.toSet());
+ lookupInfo = new QueryResultsCache.LookupInfo(queryString, () ->
writeIdList, involvedTables);
}
return lookupInfo;
}
diff --git a/ql/src/test/queries/clientpositive/results_cache_invalidation3.q
b/ql/src/test/queries/clientpositive/results_cache_invalidation3.q
new file mode 100644
index 00000000000..9c39f897821
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/results_cache_invalidation3.q
@@ -0,0 +1,16 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=false;
+set hive.fetch.task.conversion=none;
+
+CREATE TABLE author (fname STRING) STORED AS ORC
TBLPROPERTIES('transactional'='true');
+INSERT INTO author VALUES ('Victor');
+SELECT fname FROM author;
+
+DROP TABLE author;
+
+CREATE TABLE author (fname STRING) STORED AS ORC
TBLPROPERTIES('transactional'='true');
+INSERT INTO author VALUES ('Alexander');
+SELECT fname FROM author;
diff --git a/ql/src/test/queries/clientpositive/results_cache_invalidation4.q
b/ql/src/test/queries/clientpositive/results_cache_invalidation4.q
new file mode 100644
index 00000000000..e6162a5697f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/results_cache_invalidation4.q
@@ -0,0 +1,16 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
+set hive.fetch.task.conversion=none;
+
+CREATE TABLE author (fname STRING) STORED AS ORC;
+INSERT INTO author VALUES ('Victor');
+SELECT fname FROM author;
+
+DROP TABLE author;
+
+CREATE TABLE author (fname STRING) STORED AS ORC;
+INSERT INTO author VALUES ('Alexander');
+SELECT fname FROM author;
diff --git
a/ql/src/test/results/clientpositive/llap/results_cache_invalidation3.q.out
b/ql/src/test/results/clientpositive/llap/results_cache_invalidation3.q.out
new file mode 100644
index 00000000000..97ffad133e3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/results_cache_invalidation3.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
TBLPROPERTIES('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
TBLPROPERTIES('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: INSERT INTO author VALUES ('Victor')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@author
+POSTHOOK: query: INSERT INTO author VALUES ('Victor')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@author
+POSTHOOK: Lineage: author.fname SCRIPT []
+PREHOOK: query: SELECT fname FROM author
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT fname FROM author
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+#### A masked pattern was here ####
+Victor
+PREHOOK: query: DROP TABLE author
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@author
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: DROP TABLE author
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@author
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
TBLPROPERTIES('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
TBLPROPERTIES('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: INSERT INTO author VALUES ('Alexander')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@author
+POSTHOOK: query: INSERT INTO author VALUES ('Alexander')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@author
+POSTHOOK: Lineage: author.fname SCRIPT []
+PREHOOK: query: SELECT fname FROM author
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT fname FROM author
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+#### A masked pattern was here ####
+Alexander
diff --git
a/ql/src/test/results/clientpositive/llap/results_cache_invalidation4.q.out
b/ql/src/test/results/clientpositive/llap/results_cache_invalidation4.q.out
new file mode 100644
index 00000000000..3bee8497cb6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/results_cache_invalidation4.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: INSERT INTO author VALUES ('Victor')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@author
+POSTHOOK: query: INSERT INTO author VALUES ('Victor')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@author
+POSTHOOK: Lineage: author.fname SCRIPT []
+PREHOOK: query: SELECT fname FROM author
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT fname FROM author
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+#### A masked pattern was here ####
+Victor
+PREHOOK: query: DROP TABLE author
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@author
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: DROP TABLE author
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@author
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: INSERT INTO author VALUES ('Alexander')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@author
+POSTHOOK: query: INSERT INTO author VALUES ('Alexander')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@author
+POSTHOOK: Lineage: author.fname SCRIPT []
+PREHOOK: query: SELECT fname FROM author
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT fname FROM author
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+#### A masked pattern was here ####
+Alexander