Repository: hive Updated Branches: refs/heads/master 0992d8292 -> 13fbae573
HIVE-19750: Initialize NEXT_WRITE_ID. NWI_NEXT on converting an existing table to full acid (Eugene Koifman, reviewed by Sankar Hariappan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/13fbae57 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/13fbae57 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/13fbae57 Branch: refs/heads/master Commit: 13fbae57321f3525cabb326df702430d61c242f9 Parents: 0992d82 Author: Eugene Koifman <ekoif...@apache.org> Authored: Wed Jun 6 10:15:04 2018 -0700 Committer: Eugene Koifman <ekoif...@apache.org> Committed: Wed Jun 6 10:15:04 2018 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/TestTxnCommands.java | 19 ++-- .../apache/hadoop/hive/ql/TestTxnCommands2.java | 18 ++-- .../apache/hadoop/hive/ql/TestTxnLoadData.java | 39 ++++++--- .../apache/hadoop/hive/ql/TestTxnNoBuckets.java | 73 ++++++++++------ .../llap/acid_vectorization_original.q.out | 14 +-- .../tez/acid_vectorization_original_tez.q.out | 14 +-- .../TransactionalValidationListener.java | 11 +++ .../api/InitializeTableWriteIdsRequest.java | 42 +++++++++ .../hadoop/hive/metastore/txn/TxnHandler.java | 92 ++++++++++---------- .../hadoop/hive/metastore/txn/TxnStore.java | 6 ++ 10 files changed, 211 insertions(+), 117 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index a4d34a7..cd4b670 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -897,12 +897,14 @@ public class TestTxnCommands extends TxnCommandsBaseForTests { rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t0\t12")); Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/000001_0_copy_1")); Assert.assertTrue(rs.get(3), - rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17")); - Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/delta_0000001_0000001_0000/bucket_00001")); + rs.get(3).startsWith("{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17")); + Assert.assertTrue(rs.get(3), rs.get(3) + .endsWith("nonacidorctbl/delta_10000001_10000001_0000/bucket_00001")); //run Compaction runStatementOnDriver("alter table "+ TestTxnCommands2.Table.NONACIDORCTBL +" compact 'major'"); TestTxnCommands2.runWorker(hiveConf); - rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from " + Table.NONACIDORCTBL + " order by ROW__ID"); + rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from " + + Table.NONACIDORCTBL + " order by ROW__ID"); LOG.warn("after compact"); for(String s : rs) { LOG.warn(s); @@ -910,16 +912,17 @@ public class TestTxnCommands extends TxnCommandsBaseForTests { Assert.assertEquals("", 4, rs.size()); Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2")); - Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/base_0000001/bucket_00001")); + Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/base_10000001/bucket_00001")); Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5")); - Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/base_0000001/bucket_00001")); + Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/base_10000001/bucket_00001")); Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t0\t12")); - Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/base_0000001/bucket_00001")); + Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/base_10000001/bucket_00001")); Assert.assertTrue(rs.get(3), - rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17")); - Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/base_0000001/bucket_00001")); + rs.get(3) + .startsWith("{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/base_10000001/bucket_00001")); //make sure they are the same before and after compaction } http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index a547a84..7c201b6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -379,14 +379,14 @@ public class TestTxnCommands2 { */ String[][] expected = { {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":4}\t0\t13", "bucket_00001"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t0\t15", "bucket_00001"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t0\t17", "bucket_00001"}, - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t0\t120", "bucket_00001"}, + {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":1}\t0\t15", "bucket_00001"}, + {"{\"writeid\":10000003,\"bucketid\":536936448,\"rowid\":0}\t0\t17", "bucket_00001"}, + {"{\"writeid\":10000002,\"bucketid\":536936448,\"rowid\":0}\t0\t120", "bucket_00001"}, {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t2", "bucket_00001"}, {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":3}\t1\t4", "bucket_00001"}, {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5", "bucket_00001"}, {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":6}\t1\t6", "bucket_00001"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"} + {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"} }; Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size()); for(int i = 0; i < expected.length; i++) { @@ -773,11 +773,11 @@ public class TestTxnCommands2 { FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); Arrays.sort(buckets); if (numDelta == 1) { - Assert.assertEquals("delta_0000001_0000001_0000", status[i].getPath().getName()); + Assert.assertEquals("delta_10000001_10000001_0000", status[i].getPath().getName()); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } else if (numDelta == 2) { - Assert.assertEquals("delta_0000002_0000002_0000", status[i].getPath().getName()); + Assert.assertEquals("delta_10000002_10000002_0000", status[i].getPath().getName()); Assert.assertEquals(1, buckets.length); Assert.assertEquals("bucket_00000", buckets[0].getPath().getName()); } @@ -786,7 +786,7 @@ public class TestTxnCommands2 { FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); Arrays.sort(buckets); if (numDeleteDelta == 1) { - Assert.assertEquals("delete_delta_0000001_0000001_0000", status[i].getPath().getName()); + Assert.assertEquals("delete_delta_10000001_10000001_0000", status[i].getPath().getName()); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } @@ -833,7 +833,7 @@ public class TestTxnCommands2 { Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } else if (numBase == 2) { // The new base dir now has two bucket files, since the delta dir has two bucket files - Assert.assertEquals("base_0000002", status[i].getPath().getName()); + Assert.assertEquals("base_10000002", status[i].getPath().getName()); Assert.assertEquals(2, buckets.length); Assert.assertEquals("bucket_00000", buckets[0].getPath().getName()); } @@ -859,7 +859,7 @@ public class TestTxnCommands2 { status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); Assert.assertEquals(1, status.length); - Assert.assertEquals("base_0000002", status[0].getPath().getName()); + Assert.assertEquals("base_10000002", status[0].getPath().getName()); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); Arrays.sort(buckets); Assert.assertEquals(2, buckets.length); http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java index 11c5930..fb88f25 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java @@ -263,12 +263,18 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests { {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/000000_0"}, //from Load Data into acid converted table - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t2", "t/delta_0000001_0000001_0000/000001_0"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t3\t3", "t/delta_0000001_0000001_0000/000001_0"}, - {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t4\t4", "t/delta_0000001_0000001_0000/000002_0"}, - {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":1}\t5\t5", "t/delta_0000001_0000001_0000/000002_0"}, + {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t1\t2", + "t/delta_10000001_10000001_0000/000000_0"}, + {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":1}\t3\t4", + "t/delta_10000001_10000001_0000/000000_0"}, + {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t2\t2", + "t/delta_10000001_10000001_0000/000001_0"}, + {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":1}\t3\t3", + "t/delta_10000001_10000001_0000/000001_0"}, + {"{\"writeid\":10000001,\"bucketid\":537001984,\"rowid\":0}\t4\t4", + "t/delta_10000001_10000001_0000/000002_0"}, + {"{\"writeid\":10000001,\"bucketid\":537001984,\"rowid\":1}\t5\t5", + "t/delta_10000001_10000001_0000/000002_0"}, }; checkResult(expected, testQuery, isVectorized, "load data inpath"); @@ -279,9 +285,12 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests { runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' overwrite into table T"); String[][] expected2 = new String[][] { - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000002/000000_0"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000002/000000_0"}, - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000002/000001_0"} + {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":0}\t5\t6", + "t/base_10000002/000000_0"}, + {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_10000002/000000_0"}, + {"{\"writeid\":10000002,\"bucketid\":536936448,\"rowid\":0}\t8\t8", + + "t/base_10000002/000001_0"} }; checkResult(expected2, testQuery, isVectorized, "load data inpath overwrite"); @@ -291,10 +300,14 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests { TestTxnCommands2.runWorker(hiveConf); String[][] expected3 = new String[][] { - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000003/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000003/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000003/bucket_00001"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t9\t9", "t/base_0000003/bucket_00000"} + {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":0}\t5\t6", + "t/base_10000003/bucket_00000"}, + {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":1}\t7\t8", + "t/base_10000003/bucket_00000"}, + {"{\"writeid\":10000002,\"bucketid\":536936448,\"rowid\":0}\t8\t8", + "t/base_10000003/bucket_00001"}, + {"{\"writeid\":10000003,\"bucketid\":536870912,\"rowid\":0}\t9\t9", + "t/base_10000003/bucket_00000"} }; checkResult(expected3, testQuery, isVectorized, "load data inpath overwrite (major)"); } http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index c15c5a6..f071531 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -375,7 +375,7 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/000000_0_copy_1"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/delta_0000001_0000001_0000/bucket_00000"}, + {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/delta_10000001_10000001_0000/bucket_00000"}, }; rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b, INPUT__FILE__NAME"); checkExpected(rs, expected3,"after converting to acid (no compaction with updates)"); @@ -387,15 +387,24 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver /*Compaction preserves location of rows wrt buckets/tranches (for now)*/ String expected4[][] = { - {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/base_0000002/bucket_00002"}, - {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/base_0000002/bucket_00002"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "warehouse/t/base_0000002/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", "warehouse/t/base_0000002/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/base_0000002/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/base_0000002/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/base_0000002/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/base_0000002/bucket_00000"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/base_0000002/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", + "warehouse/t/base_10000002/bucket_00002"}, + {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", + "warehouse/t/base_10000002/bucket_00002"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", + "warehouse/t/base_10000002/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", + "warehouse/t/base_10000002/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", + "warehouse/t/base_10000002/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", + "warehouse/t/base_10000002/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", + "warehouse/t/base_10000002/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", + "warehouse/t/base_10000002/bucket_00000"}, + {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t60\t88", + "warehouse/t/base_10000002/bucket_00000"}, }; checkExpected(rs, expected4,"after major compact"); } @@ -467,15 +476,24 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver * Also check the file name (only) after compaction for completeness */ String[][] expected = { - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t0\t13", "bucket_00000", "000000_0_copy_1"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000", "bucket_00000"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000", "bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000", "bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "bucket_00000", "000000_0"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t1\t4", "bucket_00000", "000000_0_copy_1"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t1\t5", "bucket_00000", "000000_0_copy_1"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t1\t6", "bucket_00000", "000000_0_copy_2"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t16", "bucket_00000", "bucket_00000"} + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t0\t13", + "bucket_00000", "000000_0_copy_1"}, + {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t0\t15", + "bucket_00000", "bucket_00000"}, + {"{\"writeid\":10000003,\"bucketid\":536870912,\"rowid\":0}\t0\t17", + "bucket_00000", "bucket_00000"}, + {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":0}\t0\t120", + "bucket_00000", "bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t1\t2", + "bucket_00000", "000000_0"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t1\t4", + "bucket_00000", "000000_0_copy_1"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t1\t5", + "bucket_00000", "000000_0_copy_1"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t1\t6", + "bucket_00000", "000000_0_copy_2"}, + {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":1}\t1\t16", + "bucket_00000", "bucket_00000"} }; Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size()); for(int i = 0; i < expected.length; i++) { @@ -620,7 +638,7 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver query = "select ROW__ID, b from T where b > 0 order by a"; rs = runStatementOnDriver(query); String[][] expected4 = { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}","17"}, + {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}","17"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}","4"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}","6"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}","8"}, @@ -641,11 +659,16 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver query = "select ROW__ID, a, b, INPUT__FILE__NAME from T where b > 0 order by a, b"; rs = runStatementOnDriver(query); String[][] expected5 = {//the row__ids are the same after compaction - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_0000001/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", "warehouse/t/base_0000001/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "warehouse/t/base_0000001/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", "warehouse/t/base_0000001/bucket_00000"}, - {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", "warehouse/t/base_0000001/bucket_00000"} + {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t1\t17", + "warehouse/t/base_10000001/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", + "warehouse/t/base_10000001/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", + "warehouse/t/base_10000001/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", + "warehouse/t/base_10000001/bucket_00000"}, + {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", + "warehouse/t/base_10000001/bucket_00000"} }; checkExpected(rs, expected5, "After major compaction"); //vectorized because there is INPUT__FILE__NAME http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out index dafd5d9..957dfd8 100644 --- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out @@ -665,22 +665,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_orc_bucketed - Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) outputColumnNames: ROW__ID - Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) - Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: may be used (ACID table) @@ -692,13 +692,13 @@ STAGE PLANS: keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out b/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out index 01ec132..3c9cf03 100644 --- a/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out +++ b/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out @@ -680,22 +680,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_orc_bucketed_n0 - Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) outputColumnNames: ROW__ID - Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) - Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -704,13 +704,13 @@ STAGE PLANS: keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index c3d99c3..56da115 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hive.metastore.api.InitializeTableWriteIdsRequest; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; @@ -39,6 +40,7 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.events.PreAlterTableEvent; import org.apache.hadoop.hive.metastore.events.PreCreateTableEvent; import org.apache.hadoop.hive.metastore.events.PreEventContext; +import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.slf4j.Logger; @@ -190,6 +192,15 @@ public final class TransactionalValidationListener extends MetaStorePreEventList } } checkSorted(newTable); + if(TxnUtils.isAcidTable(newTable) && !TxnUtils.isAcidTable(oldTable)) { + /* we just made an existing table full acid which wasn't acid before and it passed all checks + initialize the Write ID sequence so that we can handle assigning ROW_IDs to 'original' + files already present in the table. */ + TxnStore t = TxnUtils.getTxnStore(getConf()); + //For now assume no partition may have > 10M files. Perhaps better to count them. + t.seedWriteIdOnAcidConversion(new InitializeTableWriteIdsRequest(newTable.getDbName(), + newTable.getTableName(), 10000000)); + } } private void checkSorted(Table newTable) throws MetaException { http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java new file mode 100644 index 0000000..d56b66a --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.api; + +public class InitializeTableWriteIdsRequest { + private final String dbName; + private final String tblName; + private final long seeWriteId; + public InitializeTableWriteIdsRequest(String dbName, String tblName, long seeWriteId) { + assert dbName != null; + assert tblName != null; + assert seeWriteId > 1; + this.dbName = dbName; + this.tblName = tblName; + this.seeWriteId = seeWriteId; + } + public String getDbName() { + return dbName; + } + public String getTblName() { + return tblName; + } + + public long getSeeWriteId() { + return seeWriteId; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index d1b0d32..f25e77a 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -67,54 +67,7 @@ import org.apache.hadoop.hive.metastore.MaterializationsRebuildLockHandler; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.MetaStoreListenerNotifier; import org.apache.hadoop.hive.metastore.TransactionalMetaStoreEventListener; -import org.apache.hadoop.hive.metastore.api.AbortTxnRequest; -import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest; -import org.apache.hadoop.hive.metastore.api.AddDynamicPartitions; -import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsRequest; -import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsResponse; -import org.apache.hadoop.hive.metastore.api.BasicTxnInfo; -import org.apache.hadoop.hive.metastore.api.CheckLockRequest; -import org.apache.hadoop.hive.metastore.api.CommitTxnRequest; -import org.apache.hadoop.hive.metastore.api.CompactionRequest; -import org.apache.hadoop.hive.metastore.api.CompactionResponse; -import org.apache.hadoop.hive.metastore.api.CompactionType; -import org.apache.hadoop.hive.metastore.api.DataOperationType; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; -import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse; -import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest; -import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsResponse; -import org.apache.hadoop.hive.metastore.api.HeartbeatRequest; -import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeRequest; -import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse; -import org.apache.hadoop.hive.metastore.api.HiveObjectType; -import org.apache.hadoop.hive.metastore.api.LockComponent; -import org.apache.hadoop.hive.metastore.api.LockRequest; -import org.apache.hadoop.hive.metastore.api.LockResponse; -import org.apache.hadoop.hive.metastore.api.LockState; -import org.apache.hadoop.hive.metastore.api.LockType; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchLockException; -import org.apache.hadoop.hive.metastore.api.NoSuchTxnException; -import org.apache.hadoop.hive.metastore.api.OpenTxnRequest; -import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.ReplTblWriteIdStateRequest; -import org.apache.hadoop.hive.metastore.api.ShowCompactRequest; -import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; -import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement; -import org.apache.hadoop.hive.metastore.api.ShowLocksRequest; -import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; -import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.TableValidWriteIds; -import org.apache.hadoop.hive.metastore.api.TxnAbortedException; -import org.apache.hadoop.hive.metastore.api.TxnInfo; -import org.apache.hadoop.hive.metastore.api.TxnOpenException; -import org.apache.hadoop.hive.metastore.api.TxnState; -import org.apache.hadoop.hive.metastore.api.TxnToWriteId; -import org.apache.hadoop.hive.metastore.api.UnlockRequest; +import org.apache.hadoop.hive.metastore.api.*; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.hive.metastore.datasource.DataSourceProvider; @@ -1537,7 +1490,50 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI { return allocateTableWriteIds(rqst); } } + @Override + public void seedWriteIdOnAcidConversion(InitializeTableWriteIdsRequest rqst) + throws MetaException { + try { + Connection dbConn = null; + Statement stmt = null; + TxnStore.MutexAPI.LockHandle handle = null; + try { + lockInternal(); + dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); + stmt = dbConn.createStatement(); + + handle = getMutexAPI().acquireLock(MUTEX_KEY.WriteIdAllocator.name()); + //since this is on conversion from non-acid to acid, NEXT_WRITE_ID should not have an entry + //for this table. It also has a unique index in case 'should not' is violated + + // First allocation of write id should add the table to the next_write_id meta table + // The initial value for write id should be 1 and hence we add 1 with number of write ids + // allocated here + String s = "insert into NEXT_WRITE_ID (nwi_database, nwi_table, nwi_next) values (" + + quoteString(rqst.getDbName()) + "," + quoteString(rqst.getTblName()) + "," + + Long.toString(rqst.getSeeWriteId() + 1) + ")"; + LOG.debug("Going to execute insert <" + s + ">"); + stmt.execute(s); + LOG.debug("Going to commit"); + dbConn.commit(); + } catch (SQLException e) { + LOG.debug("Going to rollback"); + rollbackDBConn(dbConn); + checkRetryable(dbConn, e, "seedWriteIdOnAcidConversion(" + rqst + ")"); + throw new MetaException("Unable to update transaction database " + + StringUtils.stringifyException(e)); + } finally { + close(null, stmt, dbConn); + if(handle != null) { + handle.releaseLocks(); + } + unlockInternal(); + } + } catch (RetryException e) { + seedWriteIdOnAcidConversion(rqst); + } + } @Override @RetrySemantics.SafeToRetry public void performWriteSetGC() { http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java index 4695f0d..ef447e1 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java @@ -154,6 +154,12 @@ public interface TxnStore extends Configurable { throws NoSuchTxnException, TxnAbortedException, MetaException; /** + * Called on conversion of existing table to full acid. Sets initial write ID to a high + * enough value so that we can assign unique ROW__IDs to data in existing files. + */ + void seedWriteIdOnAcidConversion(InitializeTableWriteIdsRequest rqst) throws MetaException; + + /** * Obtain a lock. * @param rqst information on the lock to obtain. If the requester is part of a transaction * the txn information must be included in the lock request.