[ https://issues.apache.org/jira/browse/TEPHRA-35?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15638418#comment-15638418 ]
ASF GitHub Bot commented on TEPHRA-35: -------------------------------------- Github user anew commented on a diff in the pull request: https://github.com/apache/incubator-tephra/pull/19#discussion_r86656565 --- Diff: tephra-hbase-compat-1.1-base/src/test/java/org/apache/tephra/hbase/InvalidListPruneTest.java --- @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tephra.hbase; + +import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableSortedMap; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.tephra.TransactionContext; +import org.apache.tephra.TransactionManager; +import org.apache.tephra.TransactionType; +import org.apache.tephra.TxConstants; +import org.apache.tephra.coprocessor.TransactionStateCache; +import org.apache.tephra.hbase.coprocessor.TransactionProcessor; +import org.apache.tephra.hbase.coprocessor.janitor.DataJanitorState; +import org.apache.tephra.inmemory.InMemoryTxSystemClient; +import org.apache.tephra.metrics.TxMetricsCollector; +import org.apache.tephra.persist.InMemoryTransactionStateStorage; +import org.apache.tephra.persist.TransactionSnapshot; +import org.apache.tephra.persist.TransactionStateStorage; +import org.apache.tephra.persist.TransactionVisibilityState; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.Collections; + +/** + * Test invalid list pruning + */ +public class InvalidListPruneTest extends AbstractHBaseTableTest { + private static final byte[] family = Bytes.toBytes("f1"); + private static final byte[] qualifier = Bytes.toBytes("col1"); + + private static TableName dataTable; + private static TableName pruneStateTable; + private static TransactionSnapshot transactionSnapshot; + + // Override AbstractHBaseTableTest.startMiniCluster to setup configuration + @BeforeClass + public static void startMiniCluster() throws Exception { + // Setup the configuration to start HBase cluster with the invalid list pruning enabled + conf = HBaseConfiguration.create(); + conf.setBoolean(TxConstants.DataJanitor.PRUNE_ENABLE, true); + AbstractHBaseTableTest.startMiniCluster(); + + TransactionStateStorage txStateStorage = new InMemoryTransactionStateStorage(); + TransactionManager txManager = new TransactionManager(conf, txStateStorage, new TxMetricsCollector()); + txManager.startAndWait(); + + // Do some transactional data operations + dataTable = TableName.valueOf("invalidListPruneTestTable"); + HTable hTable = createTable(dataTable.getName(), new byte[][]{family}, false, + Collections.singletonList(TestTransactionProcessor.class.getName())); + try (TransactionAwareHTable txTable = new TransactionAwareHTable(hTable, TxConstants.ConflictDetection.ROW)) { + TransactionContext txContext = new TransactionContext(new InMemoryTxSystemClient(txManager), txTable); + txContext.start(); + for(int i = 0; i < 10; ++i) { + txTable.put(new Put(Bytes.toBytes(i)).addColumn(family, qualifier, Bytes.toBytes(i))); + } + txContext.finish(); + } + + testUtil.flush(dataTable); + txManager.stopAndWait(); + + pruneStateTable = TableName.valueOf(conf.get(TxConstants.DataJanitor.PRUNE_STATE_TABLE, + TxConstants.DataJanitor.DEFAULT_PRUNE_STATE_TABLE)); + } + + @AfterClass + public static void shutdownAfterClass() throws Exception { + hBaseAdmin.disableTable(dataTable); + hBaseAdmin.deleteTable(dataTable); + } + + @Before + public void beforeTest() throws Exception { + HTable table = createTable(pruneStateTable.getName(), new byte[][]{DataJanitorState.FAMILY}, false, + // Prune state table is a non-transactional table, hence no transaction co-processor + Collections.<String>emptyList()); + table.close(); + } + + @After + public void afterTest() throws Exception { + hBaseAdmin.disableTable(pruneStateTable); + hBaseAdmin.deleteTable(pruneStateTable); + } + + @Test + public void testRecordCompactionState() throws Exception { + DataJanitorState dataJanitorState = + new DataJanitorState(new DataJanitorState.TableSupplier() { + @Override + public Table get() throws IOException { + return testUtil.getConnection().getTable(pruneStateTable); + } + }); + + // No prune upper bound initially + Assert.assertEquals(-1, dataJanitorState.getPruneUpperBound(getRegionName(dataTable, Bytes.toBytes(0)))); + + // Create a new transaction snapshot + transactionSnapshot = new TransactionSnapshot(100, 100, 100, ImmutableSet.of(50L), + ImmutableSortedMap.<Long, TransactionManager.InProgressTx>of()); --- End diff -- I am not sure how this works... why does creating a snapshot change the transaction state in the coprocessor? > Prune invalid transaction set once all data for a given invalid transaction > has been dropped > -------------------------------------------------------------------------------------------- > > Key: TEPHRA-35 > URL: https://issues.apache.org/jira/browse/TEPHRA-35 > Project: Tephra > Issue Type: New Feature > Reporter: Gary Helmling > Assignee: Poorna Chandra > Priority: Blocker > Attachments: ApacheTephraAutomaticInvalidListPruning-v2.pdf > > > In addition to dropping the data from invalid transactions we need to be able > to prune the invalid set of any transactions where data cleanup has been > completely performed. Without this, the invalid set will grow indefinitely > and become a greater and greater cost to in-progress transactions over time. > To do this correctly, the TransactionDataJanitor coprocessor will need to > maintain some bookkeeping for the transaction data that it removes, so that > the transaction manager can reason about when all of a given transaction's > data has been removed. Only at this point can the transaction manager safely > drop the transaction ID from the invalid set. > One approach would be for the TransactionDataJanitor to update a table > marking when a major compaction was performed on a region and what > transaction IDs were filtered out. Once all regions in a table containing the > transaction data have been compacted, we can remove the filtered out > transaction IDs from the invalid set. However, this will need to cope with > changing region names due to splits, etc. -- This message was sent by Atlassian JIRA (v6.3.4#6332)