[ 
https://issues.apache.org/jira/browse/TEPHRA-35?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15638418#comment-15638418
 ] 

ASF GitHub Bot commented on TEPHRA-35:
--------------------------------------

Github user anew commented on a diff in the pull request:

    https://github.com/apache/incubator-tephra/pull/19#discussion_r86656565
  
    --- Diff: 
tephra-hbase-compat-1.1-base/src/test/java/org/apache/tephra/hbase/InvalidListPruneTest.java
 ---
    @@ -0,0 +1,203 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +
    +package org.apache.tephra.hbase;
    +
    +import com.google.common.base.Supplier;
    +import com.google.common.collect.ImmutableSet;
    +import com.google.common.collect.ImmutableSortedMap;
    +import org.apache.hadoop.hbase.HBaseConfiguration;
    +import org.apache.hadoop.hbase.HRegionLocation;
    +import org.apache.hadoop.hbase.TableName;
    +import org.apache.hadoop.hbase.client.HTable;
    +import org.apache.hadoop.hbase.client.Put;
    +import org.apache.hadoop.hbase.client.Table;
    +import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
    +import org.apache.hadoop.hbase.util.Bytes;
    +import org.apache.tephra.TransactionContext;
    +import org.apache.tephra.TransactionManager;
    +import org.apache.tephra.TransactionType;
    +import org.apache.tephra.TxConstants;
    +import org.apache.tephra.coprocessor.TransactionStateCache;
    +import org.apache.tephra.hbase.coprocessor.TransactionProcessor;
    +import org.apache.tephra.hbase.coprocessor.janitor.DataJanitorState;
    +import org.apache.tephra.inmemory.InMemoryTxSystemClient;
    +import org.apache.tephra.metrics.TxMetricsCollector;
    +import org.apache.tephra.persist.InMemoryTransactionStateStorage;
    +import org.apache.tephra.persist.TransactionSnapshot;
    +import org.apache.tephra.persist.TransactionStateStorage;
    +import org.apache.tephra.persist.TransactionVisibilityState;
    +import org.junit.After;
    +import org.junit.AfterClass;
    +import org.junit.Assert;
    +import org.junit.Before;
    +import org.junit.BeforeClass;
    +import org.junit.Test;
    +
    +import java.io.IOException;
    +import java.util.Collections;
    +
    +/**
    + * Test invalid list pruning
    + */
    +public class InvalidListPruneTest extends AbstractHBaseTableTest {
    +  private static final byte[] family = Bytes.toBytes("f1");
    +  private static final byte[] qualifier = Bytes.toBytes("col1");
    +
    +  private static TableName dataTable;
    +  private static TableName pruneStateTable;
    +  private static TransactionSnapshot transactionSnapshot;
    +
    +  // Override AbstractHBaseTableTest.startMiniCluster to setup 
configuration
    +  @BeforeClass
    +  public static void startMiniCluster() throws Exception {
    +    // Setup the configuration to start HBase cluster with the invalid 
list pruning enabled
    +    conf = HBaseConfiguration.create();
    +    conf.setBoolean(TxConstants.DataJanitor.PRUNE_ENABLE, true);
    +    AbstractHBaseTableTest.startMiniCluster();
    +
    +    TransactionStateStorage txStateStorage = new 
InMemoryTransactionStateStorage();
    +    TransactionManager txManager = new TransactionManager(conf, 
txStateStorage, new TxMetricsCollector());
    +    txManager.startAndWait();
    +
    +    // Do some transactional data operations
    +    dataTable = TableName.valueOf("invalidListPruneTestTable");
    +    HTable hTable = createTable(dataTable.getName(), new byte[][]{family}, 
false,
    +                                
Collections.singletonList(TestTransactionProcessor.class.getName()));
    +    try (TransactionAwareHTable txTable = new 
TransactionAwareHTable(hTable, TxConstants.ConflictDetection.ROW)) {
    +      TransactionContext txContext = new TransactionContext(new 
InMemoryTxSystemClient(txManager), txTable);
    +      txContext.start();
    +      for(int i = 0; i < 10; ++i) {
    +        txTable.put(new Put(Bytes.toBytes(i)).addColumn(family, qualifier, 
Bytes.toBytes(i)));
    +      }
    +      txContext.finish();
    +    }
    +
    +    testUtil.flush(dataTable);
    +    txManager.stopAndWait();
    +
    +    pruneStateTable = 
TableName.valueOf(conf.get(TxConstants.DataJanitor.PRUNE_STATE_TABLE,
    +                                                 
TxConstants.DataJanitor.DEFAULT_PRUNE_STATE_TABLE));
    +  }
    +
    +  @AfterClass
    +  public static void shutdownAfterClass() throws Exception {
    +    hBaseAdmin.disableTable(dataTable);
    +    hBaseAdmin.deleteTable(dataTable);
    +  }
    +
    +  @Before
    +  public void beforeTest() throws Exception {
    +    HTable table = createTable(pruneStateTable.getName(), new 
byte[][]{DataJanitorState.FAMILY}, false,
    +                               // Prune state table is a non-transactional 
table, hence no transaction co-processor
    +                               Collections.<String>emptyList());
    +    table.close();
    +  }
    +
    +  @After
    +  public void afterTest() throws Exception {
    +    hBaseAdmin.disableTable(pruneStateTable);
    +    hBaseAdmin.deleteTable(pruneStateTable);
    +  }
    +
    +  @Test
    +  public void testRecordCompactionState() throws Exception {
    +    DataJanitorState dataJanitorState =
    +      new DataJanitorState(new DataJanitorState.TableSupplier() {
    +        @Override
    +        public Table get() throws IOException {
    +          return testUtil.getConnection().getTable(pruneStateTable);
    +        }
    +      });
    +
    +    // No prune upper bound initially
    +    Assert.assertEquals(-1, 
dataJanitorState.getPruneUpperBound(getRegionName(dataTable, 
Bytes.toBytes(0))));
    +
    +    // Create a new transaction snapshot
    +    transactionSnapshot = new TransactionSnapshot(100, 100, 100, 
ImmutableSet.of(50L),
    +                                                  
ImmutableSortedMap.<Long, TransactionManager.InProgressTx>of());
    --- End diff --
    
    I am not sure how this works... why does creating a snapshot change the 
transaction state in the coprocessor?


> Prune invalid transaction set once all data for a given invalid transaction 
> has been dropped
> --------------------------------------------------------------------------------------------
>
>                 Key: TEPHRA-35
>                 URL: https://issues.apache.org/jira/browse/TEPHRA-35
>             Project: Tephra
>          Issue Type: New Feature
>            Reporter: Gary Helmling
>            Assignee: Poorna Chandra
>            Priority: Blocker
>         Attachments: ApacheTephraAutomaticInvalidListPruning-v2.pdf
>
>
> In addition to dropping the data from invalid transactions we need to be able 
> to prune the invalid set of any transactions where data cleanup has been 
> completely performed. Without this, the invalid set will grow indefinitely 
> and become a greater and greater cost to in-progress transactions over time.
> To do this correctly, the TransactionDataJanitor coprocessor will need to 
> maintain some bookkeeping for the transaction data that it removes, so that 
> the transaction manager can reason about when all of a given transaction's 
> data has been removed. Only at this point can the transaction manager safely 
> drop the transaction ID from the invalid set.
> One approach would be for the TransactionDataJanitor to update a table 
> marking when a major compaction was performed on a region and what 
> transaction IDs were filtered out. Once all regions in a table containing the 
> transaction data have been compacted, we can remove the filtered out 
> transaction IDs from the invalid set. However, this will need to cope with 
> changing region names due to splits, etc.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to