http://git-wip-us.apache.org/repos/asf/hive/blob/34b0e07a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 408c089..0e0fca3 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement; import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; +import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.BucketCodec; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; @@ -64,16 +65,11 @@ import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.validation.constraints.AssertTrue; - -/** - * TODO: this should be merged with TestTxnCommands once that is checked in - * specifically the tests; the supporting code here is just a clone of TestTxnCommands - */ public class TestTxnCommands2 { static final private Logger LOG = LoggerFactory.getLogger(TestTxnCommands2.class); protected static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + @@ -88,7 +84,7 @@ public class TestTxnCommands2 { protected HiveConf hiveConf; protected Driver d; - protected static enum Table { + protected enum Table { ACIDTBL("acidTbl"), ACIDTBLPART("acidTblPart", "p"), NONACIDORCTBL("nonAcidOrcTbl"), @@ -113,6 +109,8 @@ public class TestTxnCommands2 { this.partitionColumns = partitionColumns; } } + @Rule + public ExpectedException expectedException = ExpectedException.none(); @Before public void setUp() throws Exception { @@ -357,20 +355,22 @@ public class TestTxnCommands2 { for(String s : rs) { LOG.warn(s); } + Assert.assertEquals(536870912, BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(0))); + Assert.assertEquals(536936448, BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(1))); /* * All ROW__IDs are unique on read after conversion to acid * ROW__IDs are exactly the same before and after compaction - * Also check the file name after compaction for completeness + * Also check the file name (only) after compaction for completeness */ String[][] expected = { - {"{\"transactionid\":0,\"bucketid\":0,\"rowid\":0}\t0\t13", "bucket_00000"}, + {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t13", "bucket_00000"}, {"{\"transactionid\":18,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000"}, {"{\"transactionid\":20,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000"}, - {"{\"transactionid\":0,\"bucketid\":0,\"rowid\":1}\t0\t120", "bucket_00000"}, - {"{\"transactionid\":0,\"bucketid\":1,\"rowid\":1}\t1\t2", "bucket_00001"}, - {"{\"transactionid\":0,\"bucketid\":1,\"rowid\":3}\t1\t4", "bucket_00001"}, - {"{\"transactionid\":0,\"bucketid\":1,\"rowid\":2}\t1\t5", "bucket_00001"}, - {"{\"transactionid\":0,\"bucketid\":1,\"rowid\":4}\t1\t6", "bucket_00001"}, + {"{\"transactionid\":19,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000"}, + {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t2", "bucket_00001"}, + {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":3}\t1\t4", "bucket_00001"}, + {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5", "bucket_00001"}, + {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":4}\t1\t6", "bucket_00001"}, {"{\"transactionid\":18,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"} }; Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size()); @@ -392,9 +392,20 @@ public class TestTxnCommands2 { } //make sure they are the same before and after compaction } - /** - * Test the query correctness and directory layout after ACID table conversion and MAJOR compaction + * In current implementation of ACID, altering the value of transactional_properties or trying to + * set a value for previously unset value for an acid table will throw an exception. + * @throws Exception + */ + @Test + public void testFailureOnAlteringTransactionalProperties() throws Exception { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("TBLPROPERTIES with 'transactional_properties' cannot be altered after the table is created"); + runStatementOnDriver("create table acidTblLegacy (a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("alter table acidTblLegacy SET TBLPROPERTIES ('transactional_properties' = 'default')"); + } + /** + * Test the query correctness and directory layout for ACID table conversion * 1. Insert a row to Non-ACID table * 2. Convert Non-ACID to ACID table * 3. Insert a row to ACID table @@ -410,7 +421,7 @@ public class TestTxnCommands2 { // 1. Insert a row to Non-ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files in the location (000000_0 and 000001_0) Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -426,7 +437,7 @@ public class TestTxnCommands2 { // 2. Convert NONACIDORCTBL to ACID table runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true')"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Everything should be same as before Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -442,24 +453,23 @@ public class TestTxnCommands2 { // 3. Insert another row to newly-converted ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files (000000_0 and 000001_0), plus a new delta directory. - // The delta directory should also have 2 bucket files (bucket_00000 and bucket_00001) + // The delta directory should also have only 1 bucket file (bucket_00001) Assert.assertEquals(3, status.length); boolean sawNewDelta = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + Assert.assertEquals(1, buckets.length); // only one bucket file + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); } else { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } } Assert.assertTrue(sawNewDelta); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); + rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL + " order by a,b"); resultData = new int[][] {{1, 2}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); @@ -472,16 +482,15 @@ public class TestTxnCommands2 { // There should be 1 new directory: base_xxxxxxx. // Original bucket files and delta directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(4, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { sawNewBase = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + Assert.assertEquals(1, buckets.length); + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); } } Assert.assertTrue(sawNewBase); @@ -495,13 +504,13 @@ public class TestTxnCommands2 { // 5. Let Cleaner delete obsolete files/dirs // Note, here we create a fake directory along with fake files as original directories/files String fakeFile0 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + - "/subdir/000000_0"; + "/subdir/000000_0"; String fakeFile1 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + - "/subdir/000000_1"; + "/subdir/000000_1"; fs.create(new Path(fakeFile0)); fs.create(new Path(fakeFile1)); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Before Cleaner, there should be 5 items: // 2 original files, 1 original directory, 1 base directory and 1 delta directory Assert.assertEquals(5, status.length); @@ -509,13 +518,12 @@ public class TestTxnCommands2 { // There should be only 1 directory left: base_xxxxxxx. // Original bucket files and delta directory should have been cleaned up. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + Assert.assertEquals(1, buckets.length); + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -525,7 +533,7 @@ public class TestTxnCommands2 { } /** - * Test the query correctness and directory layout after ACID table conversion and MAJOR compaction + * Test the query correctness and directory layout for ACID table conversion * 1. Insert a row to Non-ACID table * 2. Convert Non-ACID to ACID table * 3. Update the existing row in ACID table @@ -541,7 +549,7 @@ public class TestTxnCommands2 { // 1. Insert a row to Non-ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files in the location (000000_0 and 000001_0) Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -550,14 +558,14 @@ public class TestTxnCommands2 { List<String> rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); int [][] resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);//todo: what is the point of this if we just did select *? + rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); int resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 2. Convert NONACIDORCTBL to ACID table runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true')"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Everything should be same as before Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -566,29 +574,39 @@ public class TestTxnCommands2 { rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);//todo: what is the point of this if we just did select *? + rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 3. Update the existing row in newly-converted ACID table runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files (000000_0 and 000001_0), plus a new delta directory. + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + // There should be 2 original bucket files (000000_0 and 000001_0), plus one delta directory + // and one delete_delta directory. When split-update is enabled, an update event is split into + // a combination of delete and insert, that generates the delete_delta directory. // The delta directory should also have 2 bucket files (bucket_00000 and bucket_00001) - Assert.assertEquals(3, status.length); + // and so should the delete_delta directory. + Assert.assertEquals(4, status.length); boolean sawNewDelta = false; + boolean sawNewDeleteDelta = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + } else if (status[i].getPath().getName().matches("delete_delta_.*")) { + sawNewDeleteDelta = true; + FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); + Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); } else { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } } Assert.assertTrue(sawNewDelta); + Assert.assertTrue(sawNewDeleteDelta); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -602,8 +620,8 @@ public class TestTxnCommands2 { // There should be 1 new directory: base_0000001. // Original bucket files and delta directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(4, status.length); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + Assert.assertEquals(5, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { @@ -623,15 +641,15 @@ public class TestTxnCommands2 { // 5. Let Cleaner delete obsolete files/dirs status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Before Cleaner, there should be 4 items: - // 2 original files, 1 delta directory and 1 base directory - Assert.assertEquals(4, status.length); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + // Before Cleaner, there should be 5 items: + // 2 original files, 1 delta directory, 1 delete_delta directory and 1 base directory + Assert.assertEquals(5, status.length); runCleaner(hiveConf); // There should be only 1 directory left: base_0000001. - // Original bucket files and delta directory should have been cleaned up. + // Original bucket files, delta directory and delete_delta should have been cleaned up. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); @@ -646,7 +664,7 @@ public class TestTxnCommands2 { } /** - * Test the query correctness and directory layout after ACID table conversion and MAJOR compaction + * Test the query correctness and directory layout for ACID table conversion * 1. Insert a row to Non-ACID table * 2. Convert Non-ACID to ACID table * 3. Perform Major compaction @@ -663,7 +681,7 @@ public class TestTxnCommands2 { // 1. Insert a row to Non-ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files in the location (000000_0 and 000001_0) Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -676,10 +694,10 @@ public class TestTxnCommands2 { int resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - // 2. Convert NONACIDORCTBL to ACID table + // 2. Convert NONACIDORCTBL to ACID table with split_update enabled. (txn_props=default) runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true')"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Everything should be same as before Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -698,7 +716,7 @@ public class TestTxnCommands2 { // There should be 1 new directory: base_-9223372036854775808 // Original bucket files should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(3, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { @@ -722,12 +740,14 @@ public class TestTxnCommands2 { runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(status); // make sure delta_0000001_0000001_0000 appears before delta_0000002_0000002_0000 // There should be 2 original bucket files (000000_0 and 000001_0), a base directory, - // plus two new delta directories - Assert.assertEquals(5, status.length); + // plus two new delta directories and one delete_delta directory that would be created due to + // the update statement (remember split-update U=D+I)! + Assert.assertEquals(6, status.length); int numDelta = 0; + int numDeleteDelta = 0; sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { @@ -740,9 +760,17 @@ public class TestTxnCommands2 { Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } else if (numDelta == 2) { Assert.assertEquals("delta_0000023_0000023_0000", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertEquals("bucket_00000", buckets[0].getPath().getName()); - Assert.assertEquals("bucket_00001", buckets[1].getPath().getName()); + Assert.assertEquals(1, buckets.length); + Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); + } + } else if (status[i].getPath().getName().matches("delete_delta_.*")) { + numDeleteDelta++; + FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); + Arrays.sort(buckets); + if (numDeleteDelta == 1) { + Assert.assertEquals("delete_delta_0000022_0000022_0000", status[i].getPath().getName()); + Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); + Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } } else if (status[i].getPath().getName().matches("base_.*")) { Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); @@ -755,11 +783,13 @@ public class TestTxnCommands2 { } } Assert.assertEquals(2, numDelta); + Assert.assertEquals(1, numDeleteDelta); Assert.assertTrue(sawNewBase); + rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);//todo: what is the point of this if we just did select *? + rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); @@ -767,11 +797,12 @@ public class TestTxnCommands2 { runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new base directory: base_0000001 - // Original bucket files, delta directories and the previous base directory should stay until Cleaner kicks in. + // Original bucket files, delta directories, delete_delta directories and the + // previous base directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(status); - Assert.assertEquals(6, status.length); + Assert.assertEquals(7, status.length); int numBase = 0; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { @@ -785,9 +816,8 @@ public class TestTxnCommands2 { } else if (numBase == 2) { // The new base dir now has two bucket files, since the delta dir has two bucket files Assert.assertEquals("base_0000023", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertEquals("bucket_00000", buckets[0].getPath().getName()); - Assert.assertEquals("bucket_00001", buckets[1].getPath().getName()); + Assert.assertEquals(1, buckets.length); + Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } } } @@ -795,28 +825,27 @@ public class TestTxnCommands2 { rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);//todo: what is the point of this if we just did select *? + rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 6. Let Cleaner delete obsolete files/dirs status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Before Cleaner, there should be 6 items: - // 2 original files, 2 delta directories and 2 base directories - Assert.assertEquals(6, status.length); + // 2 original files, 2 delta directories, 1 delete_delta directory and 2 base directories + Assert.assertEquals(7, status.length); runCleaner(hiveConf); // There should be only 1 directory left: base_0000001. // Original bucket files, delta directories and previous base directory should have been cleaned up. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); Assert.assertEquals("base_0000023", status[0].getPath().getName()); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(buckets); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertEquals("bucket_00000", buckets[0].getPath().getName()); - Assert.assertEquals("bucket_00001", buckets[1].getPath().getName()); + Assert.assertEquals(1, buckets.length); + Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -824,9 +853,6 @@ public class TestTxnCommands2 { resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); } - - - @Test public void testValidTxnsBookkeeping() throws Exception { // 1. Run a query against a non-ACID table, and we shouldn't have txn logged in conf
http://git-wip-us.apache.org/repos/asf/hive/blob/34b0e07a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdate.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdate.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdate.java deleted file mode 100644 index 520e958..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdate.java +++ /dev/null @@ -1,545 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql; - -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -/** - * Same as TestTxnCommands2 but tests ACID tables with 'transactional_properties' set to 'default'. - * This tests whether ACID tables with split-update turned on are working correctly or not - * for the same set of tests when it is turned off. Of course, it also adds a few tests to test - * specific behaviors of ACID tables with split-update turned on. - */ -public class TestTxnCommands2WithSplitUpdate extends TestTxnCommands2 { - - public TestTxnCommands2WithSplitUpdate() { - super(); - } - - @Rule - public ExpectedException expectedException = ExpectedException.none(); - - @Override - @Before - public void setUp() throws Exception { - setUpWithTableProperties("'transactional'='true','transactional_properties'='default'"); - } - - @Override - @Test - public void testInitiatorWithMultipleFailedCompactions() throws Exception { - // Test with split-update turned on. - testInitiatorWithMultipleFailedCompactionsForVariousTblProperties("'transactional'='true','transactional_properties'='default'"); - } - - @Override - @Test - public void writeBetweenWorkerAndCleaner() throws Exception { - writeBetweenWorkerAndCleanerForVariousTblProperties("'transactional'='true','transactional_properties'='default'"); - } - - @Override - @Test - public void testACIDwithSchemaEvolutionAndCompaction() throws Exception { - testACIDwithSchemaEvolutionForVariousTblProperties("'transactional'='true','transactional_properties'='default'"); - } - - /** - * In current implementation of ACID, altering the value of transactional_properties or trying to - * set a value for previously unset value for an acid table will throw an exception. - * @throws Exception - */ - @Test - public void testFailureOnAlteringTransactionalProperties() throws Exception { - expectedException.expect(RuntimeException.class); - expectedException.expectMessage("TBLPROPERTIES with 'transactional_properties' cannot be altered after the table is created"); - runStatementOnDriver("create table acidTblLegacy (a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - runStatementOnDriver("alter table acidTblLegacy SET TBLPROPERTIES ('transactional_properties' = 'default')"); - } - /** - * Test the query correctness and directory layout for ACID table conversion with split-update - * enabled. - * 1. Insert a row to Non-ACID table - * 2. Convert Non-ACID to ACID table with split-update enabled - * 3. Insert a row to ACID table - * 4. Perform Major compaction - * 5. Clean - * @throws Exception - */ - @Test - @Override - public void testNonAcidToAcidConversion1() throws Exception { - FileSystem fs = FileSystem.get(hiveConf); - FileStatus[] status; - - // 1. Insert a row to Non-ACID table - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - List<String> rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - int [][] resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - int resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 2. Convert NONACIDORCTBL to ACID table - runStatementOnDriver("alter table " + Table.NONACIDORCTBL - + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 3. Insert another row to newly-converted ACID table - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files (000000_0 and 000001_0), plus a new delta directory. - // The delta directory should also have only 1 bucket file (bucket_00001) - Assert.assertEquals(3, status.length); - boolean sawNewDelta = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("delta_.*")) { - sawNewDelta = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, buckets.length); // only one bucket file - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - } else { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - } - Assert.assertTrue(sawNewDelta); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL + " order by a,b"); - resultData = new int[][] {{1, 2}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 4. Perform a major compaction - runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); - runWorker(hiveConf); - // There should be 1 new directory: base_xxxxxxx. - // Original bucket files and delta directory should stay until Cleaner kicks in. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(4, status.length); - boolean sawNewBase = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("base_.*")) { - sawNewBase = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - } - } - Assert.assertTrue(sawNewBase); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 5. Let Cleaner delete obsolete files/dirs - // Note, here we create a fake directory along with fake files as original directories/files - String fakeFile0 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + - "/subdir/000000_0"; - String fakeFile1 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + - "/subdir/000000_1"; - fs.create(new Path(fakeFile0)); - fs.create(new Path(fakeFile1)); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Before Cleaner, there should be 5 items: - // 2 original files, 1 original directory, 1 base directory and 1 delta directory - Assert.assertEquals(5, status.length); - runCleaner(hiveConf); - // There should be only 1 directory left: base_xxxxxxx. - // Original bucket files and delta directory should have been cleaned up. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, status.length); - Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); - FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - } - - /** - * Test the query correctness and directory layout for ACID table conversion with split-update - * enabled. - * 1. Insert a row to Non-ACID table - * 2. Convert Non-ACID to ACID table with split update enabled. - * 3. Update the existing row in ACID table - * 4. Perform Major compaction - * 5. Clean - * @throws Exception - */ - @Test - @Override - public void testNonAcidToAcidConversion2() throws Exception { - FileSystem fs = FileSystem.get(hiveConf); - FileStatus[] status; - - // 1. Insert a row to Non-ACID table - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - List<String> rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - int [][] resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - int resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 2. Convert NONACIDORCTBL to ACID table - runStatementOnDriver("alter table " + Table.NONACIDORCTBL - + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 3. Update the existing row in newly-converted ACID table - runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files (000000_0 and 000001_0), plus one delta directory - // and one delete_delta directory. When split-update is enabled, an update event is split into - // a combination of delete and insert, that generates the delete_delta directory. - // The delta directory should also have 2 bucket files (bucket_00000 and bucket_00001) - // and so should the delete_delta directory. - Assert.assertEquals(4, status.length); - boolean sawNewDelta = false; - boolean sawNewDeleteDelta = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("delta_.*")) { - sawNewDelta = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - } else if (status[i].getPath().getName().matches("delete_delta_.*")) { - sawNewDeleteDelta = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - } else { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - } - Assert.assertTrue(sawNewDelta); - Assert.assertTrue(sawNewDeleteDelta); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 4. Perform a major compaction - runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); - runWorker(hiveConf); - // There should be 1 new directory: base_0000001. - // Original bucket files and delta directory should stay until Cleaner kicks in. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(5, status.length); - boolean sawNewBase = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("base_.*")) { - sawNewBase = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - } - } - Assert.assertTrue(sawNewBase); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 5. Let Cleaner delete obsolete files/dirs - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Before Cleaner, there should be 5 items: - // 2 original files, 1 delta directory, 1 delete_delta directory and 1 base directory - Assert.assertEquals(5, status.length); - runCleaner(hiveConf); - // There should be only 1 directory left: base_0000001. - // Original bucket files, delta directory and delete_delta should have been cleaned up. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, status.length); - Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); - FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - } - - /** - * Test the query correctness and directory layout for ACID table conversion with split-update - * enabled. - * 1. Insert a row to Non-ACID table - * 2. Convert Non-ACID to ACID table with split-update enabled - * 3. Perform Major compaction - * 4. Insert a new row to ACID table - * 5. Perform another Major compaction - * 6. Clean - * @throws Exception - */ - @Test - @Override - public void testNonAcidToAcidConversion3() throws Exception { - FileSystem fs = FileSystem.get(hiveConf); - FileStatus[] status; - - // 1. Insert a row to Non-ACID table - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - List<String> rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - int [][] resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - int resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 2. Convert NONACIDORCTBL to ACID table with split_update enabled. (txn_props=default) - runStatementOnDriver("alter table " + Table.NONACIDORCTBL - + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 3. Perform a major compaction - runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); - runWorker(hiveConf); - // There should be 1 new directory: base_-9223372036854775808 - // Original bucket files should stay until Cleaner kicks in. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(3, status.length); - boolean sawNewBase = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("base_.*")) { - Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); - sawNewBase = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } - } - Assert.assertTrue(sawNewBase); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 4. Update the existing row, and insert another row to newly-converted ACID table - runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(status); // make sure delta_0000001_0000001_0000 appears before delta_0000002_0000002_0000 - // There should be 2 original bucket files (000000_0 and 000001_0), a base directory, - // plus two new delta directories and one delete_delta directory that would be created due to - // the update statement (remember split-update U=D+I)! - Assert.assertEquals(6, status.length); - int numDelta = 0; - int numDeleteDelta = 0; - sawNewBase = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("delta_.*")) { - numDelta++; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(buckets); - if (numDelta == 1) { - Assert.assertEquals("delta_0000022_0000022_0000", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } else if (numDelta == 2) { - Assert.assertEquals("delta_0000023_0000023_0000", status[i].getPath().getName()); - Assert.assertEquals(1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } - } else if (status[i].getPath().getName().matches("delete_delta_.*")) { - numDeleteDelta++; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(buckets); - if (numDeleteDelta == 1) { - Assert.assertEquals("delete_delta_0000022_0000022_0000", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } - } else if (status[i].getPath().getName().matches("base_.*")) { - Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); - sawNewBase = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } else { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - } - Assert.assertEquals(2, numDelta); - Assert.assertEquals(1, numDeleteDelta); - Assert.assertTrue(sawNewBase); - - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 5. Perform another major compaction - runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); - runWorker(hiveConf); - // There should be 1 new base directory: base_0000001 - // Original bucket files, delta directories, delete_delta directories and the - // previous base directory should stay until Cleaner kicks in. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(status); - Assert.assertEquals(7, status.length); - int numBase = 0; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("base_.*")) { - numBase++; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(buckets); - if (numBase == 1) { - Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } else if (numBase == 2) { - // The new base dir now has two bucket files, since the delta dir has two bucket files - Assert.assertEquals("base_0000023", status[i].getPath().getName()); - Assert.assertEquals(1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } - } - } - Assert.assertEquals(2, numBase); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 6. Let Cleaner delete obsolete files/dirs - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Before Cleaner, there should be 6 items: - // 2 original files, 2 delta directories, 1 delete_delta directory and 2 base directories - Assert.assertEquals(7, status.length); - runCleaner(hiveConf); - // There should be only 1 directory left: base_0000001. - // Original bucket files, delta directories and previous base directory should have been cleaned up. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, status.length); - Assert.assertEquals("base_0000023", status[0].getPath().getName()); - FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(buckets); - Assert.assertEquals(1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/34b0e07a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java index 44a9412..c76d654 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java @@ -23,11 +23,11 @@ import org.junit.Before; import org.junit.Test; /** - * Same as TestTxnCommands2WithSplitUpdate but tests ACID tables with vectorization turned on by + * Same as TestTxnCommands2 but tests ACID tables with vectorization turned on by * default, and having 'transactional_properties' set to 'default'. This specifically tests the * fast VectorizedOrcAcidRowBatchReader for ACID tables with split-update turned on. */ -public class TestTxnCommands2WithSplitUpdateAndVectorization extends TestTxnCommands2WithSplitUpdate { +public class TestTxnCommands2WithSplitUpdateAndVectorization extends TestTxnCommands2 { public TestTxnCommands2WithSplitUpdateAndVectorization() { super(); http://git-wip-us.apache.org/repos/asf/hive/blob/34b0e07a/ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java index 44ff65c..06e4f98 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat; import org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFile; import org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockFileSystem; import org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.MockPath; +import org.apache.hadoop.hive.ql.io.orc.TestOrcRawRecordMerger; import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId; import org.junit.Assert; import org.junit.Test; @@ -669,21 +670,12 @@ public class TestAcidUtils { @Test public void testAcidOperationalProperties() throws Exception { - AcidUtils.AcidOperationalProperties testObj = AcidUtils.AcidOperationalProperties.getLegacy(); - assertsForAcidOperationalProperties(testObj, "legacy"); - - testObj = AcidUtils.AcidOperationalProperties.getDefault(); + AcidUtils.AcidOperationalProperties testObj = AcidUtils.AcidOperationalProperties.getDefault(); assertsForAcidOperationalProperties(testObj, "default"); - testObj = AcidUtils.AcidOperationalProperties.parseInt(0); - assertsForAcidOperationalProperties(testObj, "legacy"); - testObj = AcidUtils.AcidOperationalProperties.parseInt(1); assertsForAcidOperationalProperties(testObj, "split_update"); - testObj = AcidUtils.AcidOperationalProperties.parseString("legacy"); - assertsForAcidOperationalProperties(testObj, "legacy"); - testObj = AcidUtils.AcidOperationalProperties.parseString("default"); assertsForAcidOperationalProperties(testObj, "default"); @@ -699,12 +691,6 @@ public class TestAcidUtils { assertEquals(1, testObj.toInt()); assertEquals("|split_update", testObj.toString()); break; - case "legacy": - assertEquals(false, testObj.isSplitUpdate()); - assertEquals(false, testObj.isHashBasedMerge()); - assertEquals(0, testObj.toInt()); - assertEquals("", testObj.toString()); - break; default: break; } @@ -716,7 +702,7 @@ public class TestAcidUtils { Configuration testConf = new Configuration(); // Test setter for configuration object. AcidUtils.setAcidOperationalProperties(testConf, oprProps); - assertEquals(1, testConf.getInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, 0)); + assertEquals(1, testConf.getInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, -1)); // Test getter for configuration object. assertEquals(oprProps.toString(), AcidUtils.getAcidOperationalProperties(testConf).toString()); @@ -726,12 +712,15 @@ public class TestAcidUtils { assertEquals(oprProps.toString(), parameters.get(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname)); // Test getter for map object. - // Calling a get on the 'parameters' will still return legacy type because the setters/getters - // for map work on different string keys. The setter will set the HIVE_TXN_OPERATIONAL_PROPERTIES - // while the getter will try to read the key TABLE_TRANSACTIONAL_PROPERTIES. - assertEquals(0, AcidUtils.getAcidOperationalProperties(parameters).toInt()); + assertEquals(1, AcidUtils.getAcidOperationalProperties(parameters).toInt()); parameters.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, oprProps.toString()); // Set the appropriate key in the map and test that we are able to read it back correctly. assertEquals(1, AcidUtils.getAcidOperationalProperties(parameters).toInt()); } + + /** + * See {@link TestOrcRawRecordMerger#testGetLogicalLength()} + */ + public void testGetLogicalLength() throws Exception { + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/34b0e07a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index b004cf5..53bd08c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -75,12 +75,14 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.io.RecordUpdater; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.Context; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy; import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; @@ -597,12 +599,13 @@ public class TestInputOutputFormat { @Test public void testACIDSplitStrategy() throws Exception { conf.set("bucket_count", "2"); + conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); MockFileSystem fs = new MockFileSystem(conf, - new MockFile("mock:/a/delta_000_001/part-00", 1000, new byte[1], new MockBlock("host1")), - new MockFile("mock:/a/delta_000_001/part-01", 1000, new byte[1], new MockBlock("host1")), - new MockFile("mock:/a/delta_001_002/part-02", 1000, new byte[1], new MockBlock("host1")), - new MockFile("mock:/a/delta_001_002/part-03", 1000, new byte[1], new MockBlock("host1"))); + new MockFile("mock:/a/delta_000_001/bucket_000000", 1000, new byte[1], new MockBlock("host1")), + new MockFile("mock:/a/delta_000_001/bucket_000001", 1000, new byte[1], new MockBlock("host1")), + new MockFile("mock:/a/delta_001_002/bucket_000000", 1000, new byte[1], new MockBlock("host1")), + new MockFile("mock:/a/delta_001_002/bucket_000001", 1000, new byte[1], new MockBlock("host1"))); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); @@ -611,9 +614,9 @@ public class TestInputOutputFormat { List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits(); ColumnarSplitSizeEstimator splitSizeEstimator = new ColumnarSplitSizeEstimator(); for (OrcSplit split: splits) { - assertEquals(Integer.MAX_VALUE, splitSizeEstimator.getEstimatedSize(split)); + assertEquals(1, splitSizeEstimator.getEstimatedSize(split)); } - assertEquals(2, splits.size()); + assertEquals(4, splits.size()); } @Test @@ -1105,6 +1108,9 @@ public class TestInputOutputFormat { } } + /** + * WARNING: detele(Path...) don't actually delete + */ public static class MockFileSystem extends FileSystem { final List<MockFile> files = new ArrayList<MockFile>(); final Map<MockFile, FileStatus> fileStatusMap = new HashMap<>(); @@ -1230,14 +1236,32 @@ public class TestInputOutputFormat { public boolean delete(Path path) throws IOException { statistics.incrementWriteOps(1); checkAccess(); - return false; + int removed = 0; + for(int i = 0; i < files.size(); i++) { + MockFile mf = files.get(i); + if(path.equals(mf.path)) { + files.remove(i); + removed++; + break; + } + } + for(int i = 0; i < globalFiles.size(); i++) { + MockFile mf = files.get(i); + if(path.equals(mf.path)) { + globalFiles.remove(i); + removed++; + break; + } + } + return removed > 0; } @Override public boolean delete(Path path, boolean b) throws IOException { - statistics.incrementWriteOps(1); - checkAccess(); - return false; + if(b) { + throw new UnsupportedOperationException(); + } + return delete(path); } @Override @@ -2690,9 +2714,11 @@ public class TestInputOutputFormat { } } // call-1: listLocatedStatus - mock:/mocktable - // call-2: open - mock:/mocktable/0_0 - // call-3: open - mock:/mocktable/0_1 - assertEquals(3, readOpsDelta); + // call-2: check existence of side file for mock:/mocktable/0_0 + // call-3: open - mock:/mocktable/0_0 + // call-4: check existence of side file for mock:/mocktable/0_1 + // call-5: open - mock:/mocktable/0_1 + assertEquals(5, readOpsDelta); assertEquals(2, splits.length); // revert back to local fs @@ -2748,9 +2774,11 @@ public class TestInputOutputFormat { } } // call-1: listLocatedStatus - mock:/mocktbl - // call-2: open - mock:/mocktbl/0_0 - // call-3: open - mock:/mocktbl/0_1 - assertEquals(3, readOpsDelta); + // call-2: check existence of side file for mock:/mocktbl/0_0 + // call-3: open - mock:/mocktbl/0_0 + // call-4: check existence of side file for mock:/mocktbl/0_1 + // call-5: open - mock:/mocktbl/0_1 + assertEquals(5, readOpsDelta); // force BI to avoid reading footers conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI"); @@ -2768,7 +2796,9 @@ public class TestInputOutputFormat { } } // call-1: listLocatedStatus - mock:/mocktbl - assertEquals(1, readOpsDelta); + // call-2: check existence of side file for mock:/mocktbl/0_0 + // call-3: check existence of side file for mock:/mocktbl/0_1 + assertEquals(3, readOpsDelta); // enable cache and use default strategy conf.set(ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "10Mb"); @@ -2787,9 +2817,11 @@ public class TestInputOutputFormat { } } // call-1: listLocatedStatus - mock:/mocktbl - // call-2: open - mock:/mocktbl/0_0 - // call-3: open - mock:/mocktbl/0_1 - assertEquals(3, readOpsDelta); + // call-2: check existence of side file for mock:/mocktbl/0_0 + // call-3: open - mock:/mocktbl/0_0 + // call-4: check existence of side file for mock:/mocktbl/0_1 + // call-5: open - mock:/mocktbl/0_1 + assertEquals(5, readOpsDelta); for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { if (statistics.getScheme().equalsIgnoreCase("mock")) { @@ -2859,9 +2891,11 @@ public class TestInputOutputFormat { } } // call-1: listLocatedStatus - mock:/mocktable - // call-2: open - mock:/mocktbl1/0_0 - // call-3: open - mock:/mocktbl1/0_1 - assertEquals(3, readOpsDelta); + // call-2: check side file for mock:/mocktbl1/0_0 + // call-3: open - mock:/mocktbl1/0_0 + // call-4: check side file for mock:/mocktbl1/0_1 + // call-5: open - mock:/mocktbl1/0_1 + assertEquals(5, readOpsDelta); // change file length and look for cache misses @@ -2898,9 +2932,11 @@ public class TestInputOutputFormat { } } // call-1: listLocatedStatus - mock:/mocktable - // call-2: open - mock:/mocktbl1/0_0 - // call-3: open - mock:/mocktbl1/0_1 - assertEquals(3, readOpsDelta); + // call-2: check side file for mock:/mocktbl1/0_0 + // call-3: open - mock:/mocktbl1/0_0 + // call-4: check side file for mock:/mocktbl1/0_1 + // call-5: open - mock:/mocktbl1/0_1 + assertEquals(5, readOpsDelta); for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { if (statistics.getScheme().equalsIgnoreCase("mock")) { @@ -2971,9 +3007,11 @@ public class TestInputOutputFormat { } } // call-1: listLocatedStatus - mock:/mocktbl2 - // call-2: open - mock:/mocktbl2/0_0 - // call-3: open - mock:/mocktbl2/0_1 - assertEquals(3, readOpsDelta); + // call-2: check side file for mock:/mocktbl2/0_0 + // call-3: open - mock:/mocktbl2/0_0 + // call-4: check side file for mock:/mocktbl2/0_1 + // call-5: open - mock:/mocktbl2/0_1 + assertEquals(5, readOpsDelta); // change file modification time and look for cache misses FileSystem fs1 = FileSystem.get(conf); @@ -2993,8 +3031,9 @@ public class TestInputOutputFormat { } } // call-1: listLocatedStatus - mock:/mocktbl2 - // call-2: open - mock:/mocktbl2/0_1 - assertEquals(2, readOpsDelta); + // call-2: check side file for mock:/mocktbl2/0_1 + // call-3: open - mock:/mocktbl2/0_1 + assertEquals(3, readOpsDelta); // touch the next file fs1 = FileSystem.get(conf); @@ -3014,8 +3053,9 @@ public class TestInputOutputFormat { } } // call-1: listLocatedStatus - mock:/mocktbl2 - // call-2: open - mock:/mocktbl2/0_0 - assertEquals(2, readOpsDelta); + // call-2: check side file for mock:/mocktbl2/0_0 + // call-3: open - mock:/mocktbl2/0_0 + assertEquals(3, readOpsDelta); for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { if (statistics.getScheme().equalsIgnoreCase("mock")) { @@ -3332,6 +3372,7 @@ public class TestInputOutputFormat { MockFileSystem fs = new MockFileSystem(conf); MockPath mockPath = new MockPath(fs, "mock:///mocktable5"); conf.set("hive.transactional.table.scan", "true"); + conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty()); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty()); conf.set("hive.orc.splits.include.file.footer", "false"); @@ -3409,6 +3450,7 @@ public class TestInputOutputFormat { MockFileSystem fs = new MockFileSystem(conf); MockPath mockPath = new MockPath(fs, "mock:///mocktable6"); conf.set("hive.transactional.table.scan", "true"); + conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty()); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty()); conf.set("hive.orc.splits.include.file.footer", "true"); @@ -3481,15 +3523,14 @@ public class TestInputOutputFormat { @Test public void testACIDReaderNoFooterSerializeWithDeltas() throws Exception { - MockFileSystem fs = new MockFileSystem(conf); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + FileSystem fs = FileSystem.get(conf); MockPath mockPath = new MockPath(fs, "mock:///mocktable7"); - conf.set("hive.transactional.table.scan", "true"); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty()); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty()); conf.set("hive.orc.splits.include.file.footer", "false"); conf.set("mapred.input.dir", mockPath.toString()); - conf.set("fs.defaultFS", "mock:///"); - conf.set("fs.mock.impl", MockFileSystem.class.getName()); StructObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = (StructObjectInspector) @@ -3505,17 +3546,22 @@ public class TestInputOutputFormat { } writer.close(); - writer = OrcFile.createWriter(new Path(new Path(mockPath + "/delta_001_002") + "/0_1"), - OrcFile.writerOptions(conf).blockPadding(false) - .bufferSize(1024).inspector(inspector)); + AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).bucket(1).minimumTransactionId(1) + .maximumTransactionId(1).inspector(inspector).finalDestination(mockPath); + OrcOutputFormat of = new OrcOutputFormat(); + RecordUpdater ru = of.getRecordUpdater(mockPath, options); for (int i = 0; i < 10; ++i) { - writer.addRow(new MyRow(i, 2 * i)); + ru.insert(options.getMinimumTransactionId(), new MyRow(i, 2 * i)); } - writer.close(); + ru.close(false);//this deletes the side file + + //set up props for read + conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); + AcidUtils.setTransactionalTableScan(conf, true); OrcInputFormat orcInputFormat = new OrcInputFormat(); InputSplit[] splits = orcInputFormat.getSplits(conf, 2); - assertEquals(1, splits.length); + assertEquals(2, splits.length); int readOpsBefore = -1; for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { if (statistics.getScheme().equalsIgnoreCase("mock")) { @@ -3530,14 +3576,8 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - // NOTE: don't be surprised if deltas value is different - // in older release deltas=2 as min and max transaction are added separately to delta list. - // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); - if (split instanceof OrcSplit) { - assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" + - " orc splits.", ((OrcSplit) split).hasFooter()); - } + assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); orcInputFormat.getRecordReader(split, conf, Reporter.NULL); } @@ -3547,11 +3587,9 @@ public class TestInputOutputFormat { readOpsDelta = statistics.getReadOps() - readOpsBefore; } } - // call-1: open to read footer - split 1 => mock:/mocktable7/0_0 - // call-2: open to read data - split 1 => mock:/mocktable7/0_0 - // call-3: open side file (flush length) of delta directory - // call-4: fs.exists() check for delta_xxx_xxx/bucket_00000 file - // call-5: AcidUtils.getAcidState - getLen() mock:/mocktable7/0_0 + // call-1: open to read data - split 1 => mock:/mocktable8/0_0 + // call-2: split 2 - find hive.acid.key.index in footer of delta_x_y/bucket_00001 + // call-3: split 2 - read delta_x_y/bucket_00001 assertEquals(5, readOpsDelta); // revert back to local fs @@ -3560,15 +3598,14 @@ public class TestInputOutputFormat { @Test public void testACIDReaderFooterSerializeWithDeltas() throws Exception { - MockFileSystem fs = new MockFileSystem(conf); + conf.set("fs.defaultFS", "mock:///"); + conf.set("fs.mock.impl", MockFileSystem.class.getName()); + FileSystem fs = FileSystem.get(conf);//ensures that FS object is cached so that everyone uses the same instance MockPath mockPath = new MockPath(fs, "mock:///mocktable8"); - conf.set("hive.transactional.table.scan", "true"); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty()); conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty()); conf.set("hive.orc.splits.include.file.footer", "true"); conf.set("mapred.input.dir", mockPath.toString()); - conf.set("fs.defaultFS", "mock:///"); - conf.set("fs.mock.impl", MockFileSystem.class.getName()); StructObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = (StructObjectInspector) @@ -3584,17 +3621,22 @@ public class TestInputOutputFormat { } writer.close(); - writer = OrcFile.createWriter(new Path(new Path(mockPath + "/delta_001_002") + "/0_1"), - OrcFile.writerOptions(conf).blockPadding(false) - .bufferSize(1024).inspector(inspector)); + AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).bucket(1).minimumTransactionId(1) + .maximumTransactionId(1).inspector(inspector).finalDestination(mockPath); + OrcOutputFormat of = new OrcOutputFormat(); + RecordUpdater ru = of.getRecordUpdater(mockPath, options); for (int i = 0; i < 10; ++i) { - writer.addRow(new MyRow(i, 2 * i)); + ru.insert(options.getMinimumTransactionId(), new MyRow(i, 2 * i)); } - writer.close(); + ru.close(false);//this deletes the side file + + //set up props for read + conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); + AcidUtils.setTransactionalTableScan(conf, true); OrcInputFormat orcInputFormat = new OrcInputFormat(); InputSplit[] splits = orcInputFormat.getSplits(conf, 2); - assertEquals(1, splits.length); + assertEquals(2, splits.length); int readOpsBefore = -1; for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) { if (statistics.getScheme().equalsIgnoreCase("mock")) { @@ -3609,14 +3651,8 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - // NOTE: don't be surprised if deltas value is different - // in older release deltas=2 as min and max transaction are added separately to delta list. - // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); - if (split instanceof OrcSplit) { - assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + - " orc splits.", ((OrcSplit) split).hasFooter()); - } + assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + + " orc splits.", ((OrcSplit) split).hasFooter()); orcInputFormat.getRecordReader(split, conf, Reporter.NULL); } @@ -3627,10 +3663,9 @@ public class TestInputOutputFormat { } } // call-1: open to read data - split 1 => mock:/mocktable8/0_0 - // call-2: open side file (flush length) of delta directory - // call-3: fs.exists() check for delta_xxx_xxx/bucket_00000 file - // call-4: AcidUtils.getAcidState - getLen() mock:/mocktable8/0_0 - assertEquals(4, readOpsDelta); + // call-2: split 2 - find hive.acid.key.index in footer of delta_x_y/bucket_00001 + // call-3: split 2 - read delta_x_y/bucket_00001 + assertEquals(3, readOpsDelta); // revert back to local fs conf.set("fs.defaultFS", "file:///");