[ https://issues.apache.org/jira/browse/HBASE-4676?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13573247#comment-13573247 ]
Hudson commented on HBASE-4676: ------------------------------- Integrated in HBase-TRUNK #3859 (See [https://builds.apache.org/job/HBase-TRUNK/3859/]) HBASE-4676 Prefix Compression - Trie data block encoding (Matt Corgan) (Revision 1443289) Result = ABORTED tedyu : Files : * /hbase/trunk/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java * /hbase/trunk/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java * /hbase/trunk/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java * /hbase/trunk/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java * /hbase/trunk/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java * /hbase/trunk/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java * /hbase/trunk/hbase-prefix-tree * /hbase/trunk/hbase-prefix-tree/pom.xml * /hbase/trunk/hbase-prefix-tree/src * /hbase/trunk/hbase-prefix-tree/src/main * /hbase/trunk/hbase-prefix-tree/src/main/java * /hbase/trunk/hbase-prefix-tree/src/main/java/org * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScanner.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java * /hbase/trunk/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java * /hbase/trunk/hbase-prefix-tree/src/test * /hbase/trunk/hbase-prefix-tree/src/test/java * /hbase/trunk/hbase-prefix-tree/src/test/java/org * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/keyvalue * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/keyvalue/TestKeyValueTool.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/PrefixTreeTestConstants.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/blockmeta * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizer.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizerData.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTreeDepth.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnBuilder.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnData.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/BaseTestRowData.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowData.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowEncoder.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNub.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSimple.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrls.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampData.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/bytes * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/bytes/TestByteRange.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/comparator * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/comparator/ByteArrayComparator.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/NumberFormatter.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/RandomNumberUtils.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestFIntTool.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVIntTool.java * /hbase/trunk/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVLongTool.java * /hbase/trunk/hbase-server/pom.xml * /hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java * /hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java * /hbase/trunk/pom.xml > Prefix Compression - Trie data block encoding > --------------------------------------------- > > Key: HBASE-4676 > URL: https://issues.apache.org/jira/browse/HBASE-4676 > Project: HBase > Issue Type: New Feature > Components: io, Performance, regionserver > Affects Versions: 0.96.0 > Reporter: Matt Corgan > Assignee: Matt Corgan > Priority: Critical > Attachments: 4676-prefix-tree-trunk-v16.patch, > 4676-prefix-tree-trunk-v17.patch, HBASE-4676-0.94-v1.patch, > HBASE-4676-common-and-server-v8.patch, > HBASE-4676-prefix-tree-trunk-v10.patch, > HBASE-4676-prefix-tree-trunk-v11.patch, > HBASE-4676-prefix-tree-trunk-v12.patch, > HBASE-4676-prefix-tree-trunk-v13.patch, > HBASE-4676-prefix-tree-trunk-v13.patch, > HBASE-4676-prefix-tree-trunk-v14.patch, > HBASE-4676-prefix-tree-trunk-v15.patch, > HBASE-4676-prefix-tree-trunk-v1.patch, HBASE-4676-prefix-tree-trunk-v2.patch, > HBASE-4676-prefix-tree-trunk-v3.patch, HBASE-4676-prefix-tree-trunk-v4.patch, > HBASE-4676-prefix-tree-trunk-v5.patch, HBASE-4676-prefix-tree-trunk-v6.patch, > HBASE-4676-prefix-tree-trunk-v7.patch, HBASE-4676-prefix-tree-trunk-v8.patch, > HBASE-4676-prefix-tree-trunk-v9.patch, hbase-prefix-trie-0.1.jar, > PrefixTrie_Format_v1.pdf, PrefixTrie_Performance_v1.pdf, SeeksPerSec by > blockSize.png > > > The HBase data block format has room for 2 significant improvements for > applications that have high block cache hit ratios. > First, there is no prefix compression, and the current KeyValue format is > somewhat metadata heavy, so there can be tremendous memory bloat for many > common data layouts, specifically those with long keys and short values. > Second, there is no random access to KeyValues inside data blocks. This > means that every time you double the datablock size, average seek time (or > average cpu consumption) goes up by a factor of 2. The standard 64KB block > size is ~10x slower for random seeks than a 4KB block size, but block sizes > as small as 4KB cause problems elsewhere. Using block sizes of 256KB or 1MB > or more may be more efficient from a disk access and block-cache perspective > in many big-data applications, but doing so is infeasible from a random seek > perspective. > The PrefixTrie block encoding format attempts to solve both of these > problems. Some features: > * trie format for row key encoding completely eliminates duplicate row keys > and encodes similar row keys into a standard trie structure which also saves > a lot of space > * the column family is currently stored once at the beginning of each block. > this could easily be modified to allow multiple family names per block > * all qualifiers in the block are stored in their own trie format which > caters nicely to wide rows. duplicate qualifers between rows are eliminated. > the size of this trie determines the width of the block's qualifier > fixed-width-int > * the minimum timestamp is stored at the beginning of the block, and deltas > are calculated from that. the maximum delta determines the width of the > block's timestamp fixed-width-int > The block is structured with metadata at the beginning, then a section for > the row trie, then the column trie, then the timestamp deltas, and then then > all the values. Most work is done in the row trie, where every leaf node > (corresponding to a row) contains a list of offsets/references corresponding > to the cells in that row. Each cell is fixed-width to enable binary > searching and is represented by [1 byte operationType, X bytes qualifier > offset, X bytes timestamp delta offset]. > If all operation types are the same for a block, there will be zero per-cell > overhead. Same for timestamps. Same for qualifiers when i get a chance. > So, the compression aspect is very strong, but makes a few small sacrifices > on VarInt size to enable faster binary searches in trie fan-out nodes. > A more compressed but slower version might build on this by also applying > further (suffix, etc) compression on the trie nodes at the cost of slower > write speed. Even further compression could be obtained by using all VInts > instead of FInts with a sacrifice on random seek speed (though not huge). > One current drawback is the current write speed. While programmed with good > constructs like TreeMaps, ByteBuffers, binary searches, etc, it's not > programmed with the same level of optimization as the read path. Work will > need to be done to optimize the data structures used for encoding and could > probably show a 10x increase. It will still be slower than delta encoding, > but with a much higher decode speed. I have not yet created a thorough > benchmark for write speed nor sequential read speed. > Though the trie is reaching a point where it is internally very efficient > (probably within half or a quarter of its max read speed) the way that hbase > currently uses it is far from optimal. The KeyValueScanner and related > classes that iterate through the trie will eventually need to be smarter and > have methods to do things like skipping to the next row of results without > scanning every cell in between. When that is accomplished it will also allow > much faster compactions because the full row key will not have to be compared > as often as it is now. > Current code is on github. The trie code is in a separate project than the > slightly modified hbase. There is an hbase project there as well with the > DeltaEncoding patch applied, and it builds on top of that. > https://github.com/hotpads/hbase/tree/prefix-trie-1 > https://github.com/hotpads/hbase-prefix-trie/tree/hcell-scanners > I'll follow up later with more implementation ideas. -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators For more information on JIRA, see: http://www.atlassian.com/software/jira