This is an automated email from the ASF dual-hosted git repository.
abstractdog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 7060d94843f HIVE-27328: Acid dirCache is not invalidated in TezAMs
while dropping table (#6309)
7060d94843f is described below
commit 7060d94843fdbc548445db6aac84dd60b44641ee
Author: Bodor Laszlo <[email protected]>
AuthorDate: Wed Feb 18 08:56:08 2026 +0100
HIVE-27328: Acid dirCache is not invalidated in TezAMs while dropping table
(#6309)
---
.../hadoop/hive/ql/exec/TableScanOperator.java | 10 +++++-
.../org/apache/hadoop/hive/ql/exec/Utilities.java | 41 ++++++++++++++++++++++
.../org/apache/hadoop/hive/ql/io/AcidUtils.java | 26 +++++++++++---
.../apache/hadoop/hive/ql/plan/TestMapWork.java | 37 +++++++++++++++++++
4 files changed, 108 insertions(+), 6 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
index 5b1a7a7c2a1..8a1e9676849 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
@@ -57,7 +57,7 @@
* read as part of map-reduce framework
**/
public class TableScanOperator extends Operator<TableScanDesc> implements
- Serializable, VectorizationContextRegion {
+ Serializable, VectorizationContextRegion, IConfigureJobConf {
private static final long serialVersionUID = 1L;
private VectorizationContext taskVectorizationContext;
@@ -87,6 +87,14 @@ public class TableScanOperator extends
Operator<TableScanDesc> implements
private ProbeDecodeContext probeDecodeContextSet;
+ @Override
+ public void configureJobConf(JobConf job) {
+ // Safety checks: table metadata may be null in certain scenarios,
particularly in test cases.
+ if (getConf() != null && getConf().getTableMetadata() != null) {
+ Utilities.setTableCreateTime(job, getConf().getTableMetadata());
+ }
+ }
+
/**
* Inner wrapper class for TS ProbeDecode optimization
*/
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index f52251beb78..3f87b123f17 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -59,6 +59,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Random;
@@ -258,6 +259,7 @@ public final class Utilities {
public static final String MAPNAME = "Map ";
public static final String REDUCENAME = "Reducer ";
public static final String ENSURE_OPERATORS_EXECUTED =
"ENSURE_OPERATORS_EXECUTED";
+ public static final String CREATE_TIME = "create_time";
public static final String SNAPSHOT_REF = "snapshot_ref";
@Deprecated
@@ -5094,4 +5096,43 @@ public static String getTableOrMVSuffix(Context context,
boolean createTableOrMV
}
return suffix;
}
+
+ /**
+ * Stores the creation time of the given table in the provided configuration.
+ * <p>
+ * The value is written under a composite key of the form:
+ * {@code <dbName>.<tableName>.<CREATE_TIME>}.
+ * </p>
+ *
+ * @param conf
+ * configuration to store the table creation time; must not be {@code
null}
+ * @param table
+ * table whose database and name are used to construct the configuration
key;
+ * must not be {@code null}
+ */
+ public static void setTableCreateTime(Configuration conf, Table table) {
+ Objects.requireNonNull(table, "Cannot get table create time. Table object
is expected to be non-null.");
+ String fullTableName = TableName.getDbTable(table.getDbName(),
table.getTableName());
+ conf.setInt(String.format("%s.%s", fullTableName, CREATE_TIME),
table.getCreateTime());
+ }
+
+ /**
+ * Retrieves the table creation time from the configuration.
+ * <p>
+ * The value is expected to be stored under the key
+ * {@code <tableName>.<CREATE_TIME>}. If the value is not
present,
+ * this method returns {@code 0}.
+ * </p>
+ *
+ * @param conf
+ * configuration containing the table creation time; must not be {@code
null}
+ * @param tableName
+ * fully qualified table name ({@code dbName.tableName})
+ * used to construct the configuration key
+ * @return
+ * the table creation time, or {@code 0} if not set
+ */
+ public static int getTableCreateTime(Configuration conf, String tableName) {
+ return conf.getInt(String.format("%s.%s", tableName, CREATE_TIME), 0);
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 6ddb8ab624c..00909c6d010 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -3317,13 +3317,23 @@ public static AcidDirectory
getAcidStateFromCache(Supplier<FileSystem> fileSyste
//dbName + tableName + dir
String key = writeIdList.getTableName() + "_" +
candidateDirectory.toString();
DirInfoValue value = dirCache.getIfPresent(key);
+ int tableCreateTimeInCache = value == null ? -1 :
value.getTableCreateTime();
+ int tableCreateTime = Utilities.getTableCreateTime(conf,
writeIdList.getTableName());
// in case of open/aborted txns, recompute dirInfo
long[] exceptions = writeIdList.getInvalidWriteIds();
boolean recompute = (exceptions != null && exceptions.length > 0);
+ // Check whether the table was re-created after being stored in the cache.
+ // The value null check avoids a noisy log message during the initial
lookup, when no cache entry exists.
+ if (value != null && tableCreateTimeInCache != tableCreateTime) {
+ LOG.info("Table {} was recreated (at: {}) since it was stored in acid
cache (at: {}), invalidating entry",
+ writeIdList.getTableName(), tableCreateTime, tableCreateTimeInCache);
+ recompute = true;
+ }
+
if (recompute) {
- LOG.info("invalidating cache entry for key: {}", key);
+ LOG.info("Invalidating cache entry for key: {}", key);
dirCache.invalidate(key);
value = null;
}
@@ -3343,7 +3353,7 @@ public static AcidDirectory
getAcidStateFromCache(Supplier<FileSystem> fileSyste
if (recompute || (value == null)) {
AcidDirectory dirInfo = getAcidState(fileSystem.get(),
candidateDirectory, conf,
writeIdList, useFileIds, ignoreEmptyFiles);
- value = new DirInfoValue(writeIdList.writeToString(), dirInfo);
+ value = new DirInfoValue(writeIdList.writeToString(), dirInfo,
tableCreateTime);
if (value.dirInfo != null && value.dirInfo.getBaseDirectory() != null
&& value.dirInfo.getCurrentDirectories().isEmpty()) {
@@ -3405,12 +3415,14 @@ public static boolean
acidTableWithoutTransactions(Table table) {
}
static class DirInfoValue {
- private String txnString;
- private AcidDirectory dirInfo;
+ private final String txnString;
+ private final AcidDirectory dirInfo;
+ private final int tableCreateTime;
- DirInfoValue(String txnString, AcidDirectory dirInfo) {
+ DirInfoValue(String txnString, AcidDirectory dirInfo, int tableCreateTime)
{
this.txnString = txnString;
this.dirInfo = dirInfo;
+ this.tableCreateTime = tableCreateTime;
}
String getTxnString() {
@@ -3420,6 +3432,10 @@ String getTxnString() {
AcidDirectory getDirInfo() {
return dirInfo;
}
+
+ int getTableCreateTime() {
+ return tableCreateTime;
+ }
}
public static String getPartitionName(Map<String, String> partitionSpec)
throws SemanticException {
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
index 7a0bd7177dc..33fa890b16e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
@@ -26,7 +26,13 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.mapred.JobConf;
import org.junit.Test;
@@ -69,4 +75,35 @@ public void
testDeriveLlapSetsCacheAffinityForTextInputFormat() {
mapWork.getCacheAffinity());
}
+ @Test
+ public void testConfigureJobConfPropagatesTableCreateTime() {
+ // Given a table with a realistic create time
+ String dbName = "test_db";
+ String tableName = "test_table";
+ int createTime = 1770653453;
+
+ Table table = new Table(dbName, tableName);
+ table.setCreateTime(createTime);
+
+ // And a TableScanOperator configured for that table
+ TableScanDesc tsDesc = new TableScanDesc(table);
+ CompilationOpContext cCtx = new CompilationOpContext();
+ TableScanOperator tsOp = new TableScanOperator(cCtx);
+ tsOp.setConf(tsDesc);
+
+ // And a MapWork that uses this TableScanOperator as a root
+ MapWork mapWork = new MapWork();
+ mapWork.getAliasToWork().put("t", tsOp);
+
+ JobConf jobConf = new JobConf();
+
+ // When configuring the job from the MapWork
+ mapWork.configureJobConf(jobConf);
+
+ // Then the table's create time should be present in the JobConf
+ String fullTableName = TableName.getDbTable(dbName, tableName);
+ assertEquals(
+ createTime,
+ Utilities.getTableCreateTime(jobConf, fullTableName));
+ }
}