This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 751fd73db24 HIVE-28551: Stale results when executing queries over 
recreated tables (#6064)
751fd73db24 is described below

commit 751fd73db24f4b7e0d34dde2ff698bd8421e83f3
Author: Thomas Rebele <[email protected]>
AuthorDate: Fri Sep 12 10:40:03 2025 +0200

    HIVE-28551: Stale results when executing queries over recreated tables 
(#6064)
    
    Co-authored-by: czxm <[email protected]>
---
 .../hive/ql/cache/results/QueryResultsCache.java   | 23 +++++---
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  6 ++-
 .../clientpositive/results_cache_invalidation3.q   | 16 ++++++
 .../clientpositive/results_cache_invalidation4.q   | 16 ++++++
 .../llap/results_cache_invalidation3.q.out         | 62 ++++++++++++++++++++++
 .../llap/results_cache_invalidation4.q.out         | 62 ++++++++++++++++++++++
 6 files changed, 178 insertions(+), 7 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java 
b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
index b613df1df44..6713219d2c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
@@ -65,7 +65,6 @@
 import org.apache.hadoop.hive.ql.hooks.Entity.Type;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.events.EventConsumer;
@@ -86,13 +85,15 @@ public final class QueryResultsCache {
   private static final Logger LOG = 
LoggerFactory.getLogger(QueryResultsCache.class);
 
   public static class LookupInfo {
-    private String queryText;
-    private Supplier<ValidTxnWriteIdList> txnWriteIdListProvider;
+    private final String queryText;
+    private final Supplier<ValidTxnWriteIdList> txnWriteIdListProvider;
+    private final Set<Long> tableIds;
 
-    public LookupInfo(String queryText, Supplier<ValidTxnWriteIdList> 
txnWriteIdListProvider) {
+    public LookupInfo(String queryText, Supplier<ValidTxnWriteIdList> 
txnWriteIdListProvider, Set<Long> tableIds) {
       super();
       this.queryText = queryText;
       this.txnWriteIdListProvider = txnWriteIdListProvider;
+      this.tableIds = tableIds;
     }
 
     public String getQueryText() {
@@ -429,7 +430,7 @@ public CacheEntry lookup(LookupInfo request) {
     Set<CacheEntry> entriesToRemove = new HashSet<CacheEntry>();
     Lock readLock = rwLock.readLock();
     try {
-      // Note: ReentrantReadWriteLock deos not allow upgrading a read lock to 
a write lock.
+      // Note: ReentrantReadWriteLock does not allow upgrading a read lock to 
a write lock.
       // Care must be taken while under read lock, to make sure we do not 
perform any actions
       // which attempt to take a write lock.
       readLock.lock();
@@ -671,10 +672,20 @@ public void notifyTableChanged(String dbName, String 
tableName, long updateTime)
    */
   private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry, 
Set<CacheEntry> entriesToRemove) {
     QueryInfo queryInfo = entry.getQueryInfo();
+
     for (ReadEntity readEntity : queryInfo.getInputs()) {
-      // Check that the tables used do not resolve to temp tables.
       if (readEntity.getType() == Type.TABLE) {
         Table tableUsed = readEntity.getTable();
+        // we want that the lookupInfo.tableIds are all covered by the table 
ids of the cache entry
+        // the query is used as cache key, so the lookup and the entry should 
use the same number of tables
+        // so it is enough to check whether every cache table id is contained 
in the lookup
+        long id = tableUsed.getTTable().getId();
+        if (!lookupInfo.tableIds.contains(id)) {
+          LOG.debug("Cache entry contains a table (tableId={}) that is not 
present in the query", id);
+          return false;
+        }
+
+        // Check that the tables used do not resolve to temp tables.
         Map<String, Table> tempTables =
             
SessionHiveMetaStoreClient.getTempTablesForDatabase(tableUsed.getDbName(), 
tableUsed.getTableName());
         if (tempTables != null && 
tempTables.containsKey(tableUsed.getTableName())) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 170d0f4e473..1eccd4e10e4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -15186,7 +15186,11 @@ private QueryResultsCache.LookupInfo 
createLookupInfoForQuery(ASTNode astNode) t
     String queryString = getQueryStringForCache(astNode);
     if (queryString != null) {
       ValidTxnWriteIdList writeIdList = getQueryValidTxnWriteIdList();
-      lookupInfo = new QueryResultsCache.LookupInfo(queryString, () -> 
writeIdList);
+      Set<Long> involvedTables = tablesFromReadEntities(inputs).stream()
+          .map(Table::getTTable)
+          .map(org.apache.hadoop.hive.metastore.api.Table::getId)
+          .collect(Collectors.toSet());
+      lookupInfo = new QueryResultsCache.LookupInfo(queryString, () -> 
writeIdList, involvedTables);
     }
     return lookupInfo;
   }
diff --git a/ql/src/test/queries/clientpositive/results_cache_invalidation3.q 
b/ql/src/test/queries/clientpositive/results_cache_invalidation3.q
new file mode 100644
index 00000000000..9c39f897821
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/results_cache_invalidation3.q
@@ -0,0 +1,16 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=false;
+set hive.fetch.task.conversion=none;
+
+CREATE TABLE author (fname STRING) STORED AS ORC 
TBLPROPERTIES('transactional'='true');
+INSERT INTO author VALUES ('Victor');
+SELECT fname FROM author;
+
+DROP TABLE author;
+
+CREATE TABLE author (fname STRING) STORED AS ORC 
TBLPROPERTIES('transactional'='true');
+INSERT INTO author VALUES ('Alexander');
+SELECT fname FROM author;
diff --git a/ql/src/test/queries/clientpositive/results_cache_invalidation4.q 
b/ql/src/test/queries/clientpositive/results_cache_invalidation4.q
new file mode 100644
index 00000000000..e6162a5697f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/results_cache_invalidation4.q
@@ -0,0 +1,16 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
+set hive.fetch.task.conversion=none;
+
+CREATE TABLE author (fname STRING) STORED AS ORC;
+INSERT INTO author VALUES ('Victor');
+SELECT fname FROM author;
+
+DROP TABLE author;
+
+CREATE TABLE author (fname STRING) STORED AS ORC;
+INSERT INTO author VALUES ('Alexander');
+SELECT fname FROM author;
diff --git 
a/ql/src/test/results/clientpositive/llap/results_cache_invalidation3.q.out 
b/ql/src/test/results/clientpositive/llap/results_cache_invalidation3.q.out
new file mode 100644
index 00000000000..97ffad133e3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/results_cache_invalidation3.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC 
TBLPROPERTIES('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC 
TBLPROPERTIES('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: INSERT INTO author VALUES ('Victor')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@author
+POSTHOOK: query: INSERT INTO author VALUES ('Victor')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@author
+POSTHOOK: Lineage: author.fname SCRIPT []
+PREHOOK: query: SELECT fname FROM author
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT fname FROM author
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+#### A masked pattern was here ####
+Victor
+PREHOOK: query: DROP TABLE author
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@author
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: DROP TABLE author
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@author
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC 
TBLPROPERTIES('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC 
TBLPROPERTIES('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: INSERT INTO author VALUES ('Alexander')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@author
+POSTHOOK: query: INSERT INTO author VALUES ('Alexander')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@author
+POSTHOOK: Lineage: author.fname SCRIPT []
+PREHOOK: query: SELECT fname FROM author
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT fname FROM author
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+#### A masked pattern was here ####
+Alexander
diff --git 
a/ql/src/test/results/clientpositive/llap/results_cache_invalidation4.q.out 
b/ql/src/test/results/clientpositive/llap/results_cache_invalidation4.q.out
new file mode 100644
index 00000000000..3bee8497cb6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/results_cache_invalidation4.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: INSERT INTO author VALUES ('Victor')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@author
+POSTHOOK: query: INSERT INTO author VALUES ('Victor')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@author
+POSTHOOK: Lineage: author.fname SCRIPT []
+PREHOOK: query: SELECT fname FROM author
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT fname FROM author
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+#### A masked pattern was here ####
+Victor
+PREHOOK: query: DROP TABLE author
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@author
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: DROP TABLE author
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@author
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: INSERT INTO author VALUES ('Alexander')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@author
+POSTHOOK: query: INSERT INTO author VALUES ('Alexander')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@author
+POSTHOOK: Lineage: author.fname SCRIPT []
+PREHOOK: query: SELECT fname FROM author
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT fname FROM author
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+#### A masked pattern was here ####
+Alexander

Reply via email to