vihangk1 commented on a change in pull request #1095:
URL: https://github.com/apache/hive/pull/1095#discussion_r439632706
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -392,7 +395,12 @@ private static Hive getInternal(HiveConf c, boolean
needsRefresh, boolean isFast
}
db = create(c, doRegisterAllFns);
}
- if (c != null) {
+ if (c != null && db.conf != null && db.conf != c) {
+ if (db.conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY) != null) {
+ c.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY,
db.conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
Review comment:
Also adding debug logs here would be useful if we are overwriting any
stuff here.
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -392,7 +395,12 @@ private static Hive getInternal(HiveConf c, boolean
needsRefresh, boolean isFast
}
db = create(c, doRegisterAllFns);
}
- if (c != null) {
+ if (c != null && db.conf != null && db.conf != c) {
+ if (db.conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY) != null) {
+ c.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY,
db.conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
Review comment:
Is there a chance of overwrite of VALID_TABLES_WRITEIDS_KEY in the
HiveConf object here? Is it possible that c already has some
VALID_TABLES_WRITEIDS_KEY here?
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -795,7 +804,13 @@ public void alterTable(String catName, String dbName,
String tblName, Table newT
// Take a table snapshot and set it to newTbl.
AcidUtils.TableSnapshot tableSnapshot = null;
if (transactional) {
- if (replWriteId > 0) {
+ if (AcidUtils.isTransactionalTable(newTbl) && !inReplication(newTbl)
&& replWriteId > 0) {
+ txnOpened = openTxnIfNeeded();
Review comment:
+1 to Peter's comment. Opening transaction during Compilation makes more
sense unless there is good reason to do it here.
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java
##########
@@ -409,7 +409,14 @@ private boolean isTargetTable(Entity entity, Table
targetTable) {
* is this the right way to compare? Should it just compare paths?
* equals() impl looks heavy weight
*/
- return targetTable.equals(entity.getTable());
+ long targetWriteId = targetTable.getTTable().getWriteId();
Review comment:
Can you add a comment here as to why we need to ignore the writeId here?
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -392,7 +395,12 @@ private static Hive getInternal(HiveConf c, boolean
needsRefresh, boolean isFast
}
db = create(c, doRegisterAllFns);
}
- if (c != null) {
+ if (c != null && db.conf != null && db.conf != c) {
+ if (db.conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY) != null) {
+ c.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY,
db.conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY));
+ } else {
+ c.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
Review comment:
Adding a debug log here would be useful if c already has
VALID_TABLES_WRITEIDS_KEY set.
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
##########
@@ -2425,6 +2425,42 @@ public static TableSnapshot
getTableSnapshot(Configuration conf,
validWriteIdList != null ? validWriteIdList.toString() : null);
}
+ /**
+ * This is called by Hive.java for all write operations (DDL). Advance write
id
+ * for the table via transaction manager, and store it in config. The write
id
+ * will be marked as committed instantly in config, as all DDL are auto
+ * committed, there's no chance to rollback.
+ */
+ public static ValidWriteIdList advanceWriteId(HiveConf conf, Table tbl)
throws LockException {
+ if (!isTransactionalTable(tbl)) {
+ return null;
+ }
+ HiveTxnManager txnMgr = SessionState.get().getTxnMgr();
+ long writeId =
SessionState.get().getTxnMgr().getTableWriteId(tbl.getDbName(),
tbl.getTableName());
Review comment:
I think its worth adding a comment here to say that this call allocates
a writeId to the table if it has not been allocated already.
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -5057,7 +5165,20 @@ public static boolean isHadoop1() {
public List<Partition> exchangeTablePartitions(Map<String, String>
partitionSpecs,
String sourceDb, String sourceTable, String destDb,
String destinationTableName) throws HiveException {
+ boolean txnOpened = false;
try {
+ Table srcTbl = getTable(sourceDb, sourceTable);
+ if (AcidUtils.isTransactionalTable(srcTbl) && !inReplication(srcTbl)) {
Review comment:
Seems like this code block is repeated many times. Perhaps refactoring
it into one helper method would make it easier to read and maintain.
##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java
##########
@@ -102,14 +102,14 @@ private void addPartition(boolean isVectorized) throws
Exception {
String testQuery = isVectorized ? "select ROW__ID, p, a, b from T order by
p, ROW__ID" :
"select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p,
ROW__ID";
String[][] expected = new String[][]{
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2",
- "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"},
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4",
- "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"},
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t0\t2",
- "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"},
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t0\t4",
- "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}};
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2",
Review comment:
Just for my understanding does compactor handle such cases correctly?
eg. delta_0000002_0000002_0000, delta_0000004_0000004_0000 is compacted to
delta_0000002_0000004_v123?
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -6151,4 +6370,36 @@ public StorageHandlerInfo getStorageHandlerInfo(Table
table)
throw new HiveException(e);
}
}
+
+ private boolean openTxnIfNeeded() throws HiveException {
+ try {
+ if (SessionState.get().getTxnMgr() == null) {
+ SessionState.get().initTxnMgr(conf);
+ }
+ HiveTxnManager txnMgr = SessionState.get().getTxnMgr();
+ if (!txnMgr.isTxnOpen()) {
+ Context ctx = new Context(conf);
+ txnMgr.openTxn(ctx, SessionState.getUserFromAuthenticator());
+ return true;
+ }
+ return false;
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
+ }
+
+ public void clearValidWriteIdList() {
Review comment:
I couldn't locate where this is getting called. Can you document when
this method is called and its intention?
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -1230,14 +1300,31 @@ public void dropTable(String dbName, String tableName,
boolean deleteData,
*/
public void dropTable(String dbName, String tableName, boolean deleteData,
boolean ignoreUnknownTab, boolean ifPurge) throws HiveException {
+ boolean txnOpened = false;
try {
+ Table tbl = null;
+ try {
+ tbl = getTable(dbName, tableName);
+ } catch (InvalidTableException e) {
+ }
+ if (tbl != null && AcidUtils.isTransactionalTable(tbl) &&
!inReplication(tbl)) {
+ txnOpened = openTxnIfNeeded();
+ // Advance writeId for ddl on transactional table
+ AcidUtils.advanceWriteId(conf, tbl);
Review comment:
It looks like we will marking the writeId of for this table even if we
didn't allocate it for this operation. Will that cause any other side effects?
Should we mark it as committed only when we allocated a new writeId for that
table?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]