This is an automated email from the ASF dual-hosted git repository.

abstractdog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7060d94843f HIVE-27328: Acid dirCache is not invalidated in TezAMs 
while dropping table (#6309)
7060d94843f is described below

commit 7060d94843fdbc548445db6aac84dd60b44641ee
Author: Bodor Laszlo <[email protected]>
AuthorDate: Wed Feb 18 08:56:08 2026 +0100

    HIVE-27328: Acid dirCache is not invalidated in TezAMs while dropping table 
(#6309)
---
 .../hadoop/hive/ql/exec/TableScanOperator.java     | 10 +++++-
 .../org/apache/hadoop/hive/ql/exec/Utilities.java  | 41 ++++++++++++++++++++++
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java    | 26 +++++++++++---
 .../apache/hadoop/hive/ql/plan/TestMapWork.java    | 37 +++++++++++++++++++
 4 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
index 5b1a7a7c2a1..8a1e9676849 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
@@ -57,7 +57,7 @@
  * read as part of map-reduce framework
  **/
 public class TableScanOperator extends Operator<TableScanDesc> implements
-    Serializable, VectorizationContextRegion {
+    Serializable, VectorizationContextRegion, IConfigureJobConf {
   private static final long serialVersionUID = 1L;
 
   private VectorizationContext taskVectorizationContext;
@@ -87,6 +87,14 @@ public class TableScanOperator extends 
Operator<TableScanDesc> implements
 
   private ProbeDecodeContext probeDecodeContextSet;
 
+  @Override
+  public void configureJobConf(JobConf job) {
+    // Safety checks: table metadata may be null in certain scenarios, 
particularly in test cases.
+    if (getConf() != null && getConf().getTableMetadata() != null) {
+      Utilities.setTableCreateTime(job, getConf().getTableMetadata());
+    }
+  }
+
   /**
    * Inner wrapper class for TS ProbeDecode optimization
    */
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index f52251beb78..3f87b123f17 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -59,6 +59,7 @@
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Optional;
 import java.util.Properties;
 import java.util.Random;
@@ -258,6 +259,7 @@ public final class Utilities {
   public static final String MAPNAME = "Map ";
   public static final String REDUCENAME = "Reducer ";
   public static final String ENSURE_OPERATORS_EXECUTED = 
"ENSURE_OPERATORS_EXECUTED";
+  public static final String CREATE_TIME = "create_time";
   public static final String SNAPSHOT_REF = "snapshot_ref";
 
   @Deprecated
@@ -5094,4 +5096,43 @@ public static String getTableOrMVSuffix(Context context, 
boolean createTableOrMV
     }
     return suffix;
   }
+
+  /**
+   * Stores the creation time of the given table in the provided configuration.
+   * <p>
+   * The value is written under a composite key of the form:
+   * {@code &lt;dbName&gt;.&lt;tableName&gt;.&lt;CREATE_TIME&gt;}.
+   * </p>
+   *
+   * @param conf
+   *     configuration to store the table creation time; must not be {@code 
null}
+   * @param table
+   *     table whose database and name are used to construct the configuration 
key;
+   *     must not be {@code null}
+   */
+  public static void setTableCreateTime(Configuration conf, Table table) {
+    Objects.requireNonNull(table, "Cannot get table create time. Table object 
is expected to be non-null.");
+    String fullTableName = TableName.getDbTable(table.getDbName(), 
table.getTableName());
+    conf.setInt(String.format("%s.%s", fullTableName, CREATE_TIME), 
table.getCreateTime());
+  }
+
+  /**
+   * Retrieves the table creation time from the configuration.
+   * <p>
+   * The value is expected to be stored under the key
+   * {@code &lt;tableName&gt;.&lt;CREATE_TIME&gt;}. If the value is not 
present,
+   * this method returns {@code 0}.
+   * </p>
+   *
+   * @param conf
+   *     configuration containing the table creation time; must not be {@code 
null}
+   * @param tableName
+   *     fully qualified table name ({@code dbName.tableName})
+   *     used to construct the configuration key
+   * @return
+   *     the table creation time, or {@code 0} if not set
+   */
+  public static int getTableCreateTime(Configuration conf, String tableName) {
+    return conf.getInt(String.format("%s.%s", tableName, CREATE_TIME), 0);
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 6ddb8ab624c..00909c6d010 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -3317,13 +3317,23 @@ public static AcidDirectory 
getAcidStateFromCache(Supplier<FileSystem> fileSyste
     //dbName + tableName + dir
     String key = writeIdList.getTableName() + "_" + 
candidateDirectory.toString();
     DirInfoValue value = dirCache.getIfPresent(key);
+    int tableCreateTimeInCache = value == null ? -1 : 
value.getTableCreateTime();
+    int tableCreateTime = Utilities.getTableCreateTime(conf, 
writeIdList.getTableName());
 
     // in case of open/aborted txns, recompute dirInfo
     long[] exceptions = writeIdList.getInvalidWriteIds();
     boolean recompute = (exceptions != null && exceptions.length > 0);
 
+    // Check whether the table was re-created after being stored in the cache.
+    // The value null check avoids a noisy log message during the initial 
lookup, when no cache entry exists.
+    if (value != null && tableCreateTimeInCache != tableCreateTime) {
+      LOG.info("Table {} was recreated (at: {}) since it was stored in acid 
cache (at: {}), invalidating entry",
+          writeIdList.getTableName(), tableCreateTime, tableCreateTimeInCache);
+      recompute = true;
+    }
+
     if (recompute) {
-      LOG.info("invalidating cache entry for key: {}", key);
+      LOG.info("Invalidating cache entry for key: {}", key);
       dirCache.invalidate(key);
       value = null;
     }
@@ -3343,7 +3353,7 @@ public static AcidDirectory 
getAcidStateFromCache(Supplier<FileSystem> fileSyste
     if (recompute || (value == null)) {
       AcidDirectory dirInfo = getAcidState(fileSystem.get(), 
candidateDirectory, conf,
           writeIdList, useFileIds, ignoreEmptyFiles);
-      value = new DirInfoValue(writeIdList.writeToString(), dirInfo);
+      value = new DirInfoValue(writeIdList.writeToString(), dirInfo, 
tableCreateTime);
 
       if (value.dirInfo != null && value.dirInfo.getBaseDirectory() != null
           && value.dirInfo.getCurrentDirectories().isEmpty()) {
@@ -3405,12 +3415,14 @@ public static boolean 
acidTableWithoutTransactions(Table table) {
   }
 
   static class DirInfoValue {
-    private String txnString;
-    private AcidDirectory dirInfo;
+    private final String txnString;
+    private final AcidDirectory dirInfo;
+    private final int tableCreateTime;
 
-    DirInfoValue(String txnString, AcidDirectory dirInfo) {
+    DirInfoValue(String txnString, AcidDirectory dirInfo, int tableCreateTime) 
{
       this.txnString = txnString;
       this.dirInfo = dirInfo;
+      this.tableCreateTime = tableCreateTime;
     }
 
     String getTxnString() {
@@ -3420,6 +3432,10 @@ String getTxnString() {
     AcidDirectory getDirInfo() {
       return dirInfo;
     }
+
+    int getTableCreateTime() {
+      return tableCreateTime;
+    }
   }
 
   public static String getPartitionName(Map<String, String> partitionSpec) 
throws SemanticException {
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java 
b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
index 7a0bd7177dc..33fa890b16e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
@@ -26,7 +26,13 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.TableName;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.mapred.JobConf;
 
 import org.junit.Test;
 
@@ -69,4 +75,35 @@ public void 
testDeriveLlapSetsCacheAffinityForTextInputFormat() {
         mapWork.getCacheAffinity());
   }
 
+  @Test
+  public void testConfigureJobConfPropagatesTableCreateTime() {
+    // Given a table with a realistic create time
+    String dbName = "test_db";
+    String tableName = "test_table";
+    int createTime = 1770653453;
+
+    Table table = new Table(dbName, tableName);
+    table.setCreateTime(createTime);
+
+    // And a TableScanOperator configured for that table
+    TableScanDesc tsDesc = new TableScanDesc(table);
+    CompilationOpContext cCtx = new CompilationOpContext();
+    TableScanOperator tsOp = new TableScanOperator(cCtx);
+    tsOp.setConf(tsDesc);
+
+    // And a MapWork that uses this TableScanOperator as a root
+    MapWork mapWork = new MapWork();
+    mapWork.getAliasToWork().put("t", tsOp);
+
+    JobConf jobConf = new JobConf();
+
+    // When configuring the job from the MapWork
+    mapWork.configureJobConf(jobConf);
+
+    // Then the table's create time should be present in the JobConf
+    String fullTableName = TableName.getDbTable(dbName, tableName);
+    assertEquals(
+        createTime,
+        Utilities.getTableCreateTime(jobConf, fullTableName));
+  }
 }

Reply via email to