Author: xedin Date: Mon Oct 31 19:33:58 2011 New Revision: 1195630 URL: http://svn.apache.org/viewvc?rev=1195630&view=rev Log: Avoid large array allocation for compressed chunk offsets patch by Sylvain Lebresne; reviewed by Pavel Yaskevich for CASSANDRA-3432
Added: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/BigLongArray.java Modified: cassandra/branches/cassandra-1.0/CHANGES.txt cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressionMetadata.java Modified: cassandra/branches/cassandra-1.0/CHANGES.txt URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/CHANGES.txt?rev=1195630&r1=1195629&r2=1195630&view=diff ============================================================================== --- cassandra/branches/cassandra-1.0/CHANGES.txt (original) +++ cassandra/branches/cassandra-1.0/CHANGES.txt Mon Oct 31 19:33:58 2011 @@ -4,7 +4,7 @@ * Add timing information to cassandra-cli GET/SET/LIST queries (CASSANDRA-3326) * Cache for CompressionMetadata objects (CASSANDRA-3427) * synchronize BiMap of bootstrapping tokens (CASSANDRA-3417) - + * Avoid large array allocation for compressed chunk offsets (CASSANDRA-3432) 1.0.1 * acquire references during index build to prevent delete problems Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressionMetadata.java URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressionMetadata.java?rev=1195630&r1=1195629&r2=1195630&view=diff ============================================================================== --- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressionMetadata.java (original) +++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressionMetadata.java Mon Oct 31 19:33:58 2011 @@ -27,6 +27,7 @@ import org.apache.cassandra.config.Confi import org.apache.cassandra.io.sstable.Component; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.util.FileUtils; +import org.apache.cassandra.utils.BigLongArray; /** * Holds metadata about compressed file @@ -35,7 +36,7 @@ public class CompressionMetadata { public final long dataLength; public final long compressedFileLength; - public final long[] chunkOffsets; + private final BigLongArray chunkOffsets; public final String indexFilePath; public final CompressionParameters parameters; @@ -136,16 +137,16 @@ public class CompressionMetadata * * @throws java.io.IOException on any I/O error (except EOF). */ - private long[] readChunkOffsets(DataInput input) throws IOException + private BigLongArray readChunkOffsets(DataInput input) throws IOException { int chunkCount = input.readInt(); - long[] offsets = new long[chunkCount]; + BigLongArray offsets = new BigLongArray(chunkCount); - for (int i = 0; i < offsets.length; i++) + for (int i = 0; i < chunkCount; i++) { try { - offsets[i] = input.readLong(); + offsets.set(i, input.readLong()); } catch (EOFException e) { @@ -171,13 +172,13 @@ public class CompressionMetadata // position of the chunk int idx = (int) (position / parameters.chunkLength()); - if (idx >= chunkOffsets.length) + if (idx >= chunkOffsets.size) throw new EOFException(); - long chunkOffset = chunkOffsets[idx]; - long nextChunkOffset = (idx + 1 == chunkOffsets.length) + long chunkOffset = chunkOffsets.get(idx); + long nextChunkOffset = (idx + 1 == chunkOffsets.size) ? compressedFileLength - : chunkOffsets[idx + 1]; + : chunkOffsets.get(idx + 1); return new Chunk(chunkOffset, (int) (nextChunkOffset - chunkOffset - 4)); // "4" bytes reserved for checksum } Added: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/BigLongArray.java URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/BigLongArray.java?rev=1195630&view=auto ============================================================================== --- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/BigLongArray.java (added) +++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/utils/BigLongArray.java Mon Oct 31 19:33:58 2011 @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.cassandra.utils; + +/** + * A GC friendly long[]. + * Allocating large arrays (that are not short-lived) generate fragmentation + * in old-gen space. This breaks such large long array into fixed size pages + * to avoid that problem. + */ +public class BigLongArray +{ + private static final int DEFAULT_PAGE_SIZE = 4096; + + private final long[][] pages; + public final int size; + + private final int pageSize; + private final int pageCount; + + public BigLongArray(int size) + { + this(size, DEFAULT_PAGE_SIZE); + } + + public BigLongArray(int size, int pageSize) + { + this.size = size; + this.pageSize = pageSize; + + int lastPageSize = size % pageSize; + int fullPageCount = size / pageSize; + pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1); + pages = new long[pageCount][]; + + for (int i = 0; i < fullPageCount; ++i) + pages[i] = new long[pageSize]; + + if (lastPageSize != 0) + pages[pages.length - 1] = new long[lastPageSize]; + } + + public void set(int idx, long value) + { + if (idx < 0 || idx > size) + throw new IndexOutOfBoundsException(String.format("%d is not whithin [0, %d)", idx, size)); + + int page = idx / pageSize; + int pageIdx = idx % pageSize; + pages[page][pageIdx] = value; + } + + public long get(int idx) + { + if (idx < 0 || idx > size) + throw new IndexOutOfBoundsException(String.format("%d is not whithin [0, %d)", idx, size)); + + int page = idx / pageSize; + int pageIdx = idx % pageSize; + return pages[page][pageIdx]; + } +}