Author: prasanthj Date: Sat Jan 10 01:19:32 2015 New Revision: 1650707 URL: http://svn.apache.org/r1650707 Log: experimental version of orc metadata cache
Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java Removed: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcMetadataCache.java Modified: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Modified: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java?rev=1650707&r1=1650706&r2=1650707&view=diff ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java (original) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java Sat Jan 10 01:19:32 2015 @@ -20,10 +20,7 @@ package org.apache.hadoop.hive.llap.io.e import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -38,19 +35,21 @@ import org.apache.hadoop.hive.llap.io.ap import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; import org.apache.hadoop.hive.llap.io.api.orc.OrcBatchKey; import org.apache.hadoop.hive.llap.io.api.orc.OrcCacheKey; +import org.apache.hadoop.hive.llap.io.metadata.OrcMetadataCache; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.ql.io.orc.RecordReader; import org.apache.hadoop.hive.ql.io.orc.StripeInformation; -import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputSplit; public class OrcEncodedDataProducer implements EncodedDataProducer<OrcBatchKey> { private FileSystem cachedFs = null; - private final OrcMetadataCache metadataCache = new OrcMetadataCache(); + private Configuration conf; + private OrcMetadataCache metadataCache; private final Allocator allocator; private final Cache<OrcCacheKey> cache; @@ -101,14 +100,8 @@ public class OrcEncodedDataProducer impl orcReader = null; if (stripes == null || types == null) { orcReader = createOrcReader(split); - if (stripes == null) { - stripes = orcReader.getStripes(); - metadataCache.cacheStripes(internedFilePath, stripes); - } - if (types == null) { - types = orcReader.getTypes(); - metadataCache.cacheTypes(internedFilePath, types); - } + stripes = metadataCache.getStripes(internedFilePath); + types = metadataCache.getTypes(internedFilePath); } if (columnIds == null) { @@ -298,10 +291,12 @@ public class OrcEncodedDataProducer impl private Reader createOrcReader(FileSplit fileSplit) throws IOException { FileSystem fs = cachedFs; Path path = fileSplit.getPath(); - Configuration conf = new Configuration(); if ("pfile".equals(path.toUri().getScheme())) { fs = path.getFileSystem(conf); // Cannot use cached FS due to hive tests' proxy FS. } + if (metadataCache == null) { + metadataCache = new OrcMetadataCache(cachedFs, path, conf); + } return OrcFile.createReader(path, OrcFile.readerOptions(conf).filesystem(fs)); } @@ -315,6 +310,8 @@ public class OrcEncodedDataProducer impl this.cachedFs = FileSystem.get(conf); this.cache = cache; this.allocator = allocator; + this.conf = conf; + this.metadataCache = null; } @Override Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java?rev=1650707&view=auto ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java (added) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java Sat Jan 10 01:19:32 2015 @@ -0,0 +1,119 @@ +/** + * Copyright 2014 Prasanth Jayachandran + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.llap.io.metadata; + +/** + * + */ +public class CompressionBuffer { + // stripe position within file + private int stripePos; + + // row group position within stripe + private int rowGroupPos; + + // start offset of compression buffer corresponding to above row group + private long startOffset; + + // length of compression buffer (compressed or uncompressed length) + private long length; + + // offset within compression buffer where the row group begins + private int uncompressedOffset; + + // if uncompressedOffset is in a middle of integer encoding runs (RLE, Delta etc.), consume + // these many values to reach beginning of the row group + private int consume; + + // For run length byte encoding, record the number of bits within current byte to consume to + // reach beginning of the row group. This is required for IS_PRESENT stream. + private int consumeBits; + + // if last row group is set to true, it means the above row group spans compression buffer + // boundary. Length will span two compression buffers. + private boolean lastRowGroup; + + private CompressionBuffer(int sp, int rgp, long s, long len, int u, int c, int cb, boolean last) { + this.stripePos = sp; + this.rowGroupPos = rgp; + this.startOffset = s; + this.length = len; + this.uncompressedOffset = u; + this.consume = c; + this.consumeBits = cb; + this.lastRowGroup = last; + } + + private static class Builder { + private int stripePos; + private int rowGroupPos; + private long startOffset; + private long length; + private int offsetWithinBuffer; + private int consume; + private int consumeBits; + private boolean lastRowGroup; + + public Builder setStripePosition(int stripePos) { + this.stripePos = stripePos; + return this; + } + + public Builder setRowGroupPosition(int rowGroupPos) { + this.rowGroupPos = rowGroupPos; + return this; + } + + public Builder setStartOffset(long startOffset) { + this.startOffset = startOffset; + return this; + } + + public Builder setLength(long length) { + this.length = length; + return this; + } + + public Builder setOffsetWithInBuffer(int offsetWithInBuffer) { + this.offsetWithinBuffer = offsetWithInBuffer; + return this; + } + + public Builder consumeRuns(int consume) { + this.consume = consume; + return this; + } + + public Builder consumeBits(int consumeBits) { + this.consumeBits = consumeBits; + return this; + } + + public Builder setLastRowGroup(boolean lastRowGroup) { + this.lastRowGroup = lastRowGroup; + return this; + } + + public CompressionBuffer build() { + return new CompressionBuffer(stripePos, rowGroupPos, startOffset, length, offsetWithinBuffer, + consume, consumeBits, lastRowGroup); + } + } + + public static Builder builder() { + return new Builder(); + } +} Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java?rev=1650707&view=auto ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java (added) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java Sat Jan 10 01:19:32 2015 @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.io.metadata; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.io.orc.CompressionKind; +import org.apache.hadoop.hive.ql.io.orc.OrcProto; +import org.apache.hadoop.hive.ql.io.orc.StripeInformation; + +public class OrcMetadata { + private CompressionKind compressionKind; + private int compressionBufferSize; + private List<OrcProto.Type> types; + private List<StripeInformation> stripes; + private Map<Integer, List<OrcProto.ColumnEncoding>> stripeToColEncodings; + private Map<Integer, OrcProto.RowIndex[]> stripeToRowIndexEntries; + + public Map<Integer, List<OrcProto.ColumnEncoding>> getStripeToColEncodings() { + return stripeToColEncodings; + } + + public void setStripeToColEncodings( + Map<Integer, List<OrcProto.ColumnEncoding>> stripeToColEncodings) { + this.stripeToColEncodings = stripeToColEncodings; + } + + public Map<Integer, OrcProto.RowIndex[]> getStripeToRowIndexEntries() { + return stripeToRowIndexEntries; + } + + public void setStripeToRowIndexEntries( + Map<Integer, OrcProto.RowIndex[]> stripeToRowIndexEntries) { + this.stripeToRowIndexEntries = stripeToRowIndexEntries; + } + + public List<StripeInformation> getStripes() { + return stripes; + } + + public void setStripes(List<StripeInformation> stripes) { + this.stripes = stripes; + } + + public CompressionKind getCompressionKind() { + return compressionKind; + } + + public void setCompressionKind(CompressionKind compressionKind) { + this.compressionKind = compressionKind; + } + + public int getCompressionBufferSize() { + return compressionBufferSize; + } + + public void setCompressionBufferSize(int compressionBufferSize) { + this.compressionBufferSize = compressionBufferSize; + } + + public List<OrcProto.Type> getTypes() { + return types; + } + + public void setTypes(List<OrcProto.Type> types) { + this.types = types; + } +} Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java?rev=1650707&view=auto ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java (added) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java Sat Jan 10 01:19:32 2015 @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.io.metadata; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.orc.CompressionKind; +import org.apache.hadoop.hive.ql.io.orc.OrcProto; +import org.apache.hadoop.hive.ql.io.orc.StripeInformation; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; + +/** + * ORC-specific metadata cache. + */ +public class OrcMetadataCache { + private static final int DEFAULT_CACHE_ACCESS_CONCURRENCY = 10; + private static final int DEFAULT_MAX_CACHE_ENTRIES = 100; + private static Cache<String, OrcMetadata> METADATA; + + static { + METADATA = CacheBuilder.newBuilder() + .concurrencyLevel(DEFAULT_CACHE_ACCESS_CONCURRENCY) + .maximumSize(DEFAULT_MAX_CACHE_ENTRIES) + .build(); + } + + private Path path; + private OrcMetadataLoader loader; + + public OrcMetadataCache(FileSystem fs, Path path, Configuration conf) { + this.path = path; + this.loader = new OrcMetadataLoader(fs, path, conf); + } + + public CompressionKind getCompression(String pathString) throws IOException { + try { + return METADATA.get(pathString, loader).getCompressionKind(); + } catch (ExecutionException e) { + throw new IOException("Unable to load orc metadata for " + path.toString(), e); + } + } + + public int getCompressionBufferSize(String pathString) throws IOException { + try { + return METADATA.get(pathString, loader).getCompressionBufferSize(); + } catch (ExecutionException e) { + throw new IOException("Unable to load orc metadata for " + path.toString(), e); + } + } + + public List<OrcProto.Type> getTypes(String pathString) throws IOException { + try { + return METADATA.get(pathString, loader).getTypes(); + } catch (ExecutionException e) { + throw new IOException("Unable to load orc metadata for " + path.toString(), e); + } + } + + public List<StripeInformation> getStripes(String pathString) throws IOException { + try { + return METADATA.get(pathString, loader).getStripes(); + } catch (ExecutionException e) { + throw new IOException("Unable to load orc metadata for " + path.toString(), e); + } + } + + // public boolean[] getIncludedRowGroups(String pathString, SearchArgument sarg, int stripeIdx) throws IOException { + // try { + // return METADATA.get(pathString, loader).getStripeToRowIndexEntries(); + // } catch (ExecutionException e) { + // throw new IOException("Unable to load orc metadata for " + path.toString(), e); + // } + // } +} Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java?rev=1650707&view=auto ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java (added) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java Sat Jan 10 01:19:32 2015 @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.io.metadata; + +import static org.apache.hadoop.hive.ql.io.orc.OrcFile.readerOptions; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.llap.io.orc.OrcFile; +import org.apache.hadoop.hive.llap.io.orc.Reader; +import org.apache.hadoop.hive.llap.io.orc.RecordReader; +import org.apache.hadoop.hive.ql.io.orc.OrcProto; +import org.apache.hadoop.hive.ql.io.orc.StripeInformation; + +public class OrcMetadataLoader implements Callable<OrcMetadata> { + private FileSystem fs; + private Path path; + private Configuration conf; + + public OrcMetadataLoader(FileSystem fs, Path path, Configuration conf) { + this.fs = fs; + this.path = path; + this.conf = conf; + } + + @Override + public OrcMetadata call() throws Exception { + Reader reader = OrcFile.createLLAPReader(path, readerOptions(conf).filesystem(fs)); + OrcMetadata orcMetadata = new OrcMetadata(); + orcMetadata.setCompressionKind(reader.getCompression()); + orcMetadata.setCompressionBufferSize(reader.getCompressionSize()); + List<StripeInformation> stripes = reader.getStripes(); + orcMetadata.setStripes(stripes); + Map<Integer, List<OrcProto.ColumnEncoding>> stripeColEnc = new HashMap<Integer, List<OrcProto.ColumnEncoding>>(); + Map<Integer, OrcProto.RowIndex[]> stripeRowIndices = new HashMap<Integer, OrcProto.RowIndex[]>(); + RecordReader rows = reader.rows(); + for (int i = 0; i < stripes.size(); i++) { + stripeColEnc.put(i, rows.getColumnEncodings(i)); + stripeRowIndices.put(i, rows.getRowIndexEntries(i)); + } + orcMetadata.setStripeToColEncodings(stripeColEnc); + orcMetadata.setStripeToRowIndexEntries(stripeRowIndices); + return orcMetadata; + } +} Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java?rev=1650707&view=auto ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java (added) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java Sat Jan 10 01:19:32 2015 @@ -0,0 +1,57 @@ +/** + * Copyright 2014 Prasanth Jayachandran + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.llap.io.orc; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.llap.io.metadata.CompressionBuffer; +import org.apache.hadoop.hive.ql.io.orc.CompressionKind; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; +import org.apache.hadoop.hive.ql.io.orc.OrcProto; +import org.apache.hadoop.hive.ql.io.orc.ReaderImpl; + +/** + * + */ +public class LLAPReaderImpl extends ReaderImpl implements Reader { + + public LLAPReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException { + super(path, options); + } + + @Override + public RecordReader rows() throws IOException { + Reader.Options options = new Options(); + boolean[] include = options.getInclude(); + // if included columns is null, then include all columns + if (include == null) { + include = new boolean[footer.getTypesCount()]; + Arrays.fill(include, true); + options.include(include); + } + return new LLAPRecordReaderImpl(this.getStripes(), fileSystem, path, + options, footer.getTypesList(), codec, bufferSize, + footer.getRowIndexStride(), conf); + } + + @Override + public RecordReader rows(CompressionBuffer buffer, CompressionKind kind, + OrcProto.ColumnEncoding encoding) throws IOException { + return null; + } +} Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java?rev=1650707&view=auto ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java (added) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java Sat Jan 10 01:19:32 2015 @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.llap.io.orc; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.llap.Consumer; +import org.apache.hadoop.hive.llap.io.api.EncodedColumn; +import org.apache.hadoop.hive.llap.io.api.cache.Allocator; +import org.apache.hadoop.hive.llap.io.api.orc.OrcBatchKey; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.io.orc.*; +import org.apache.hadoop.hive.ql.io.orc.Reader; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; + +/** + * + */ +public class LLAPRecordReaderImpl extends RecordReaderImpl implements RecordReader { + LLAPRecordReaderImpl(List<StripeInformation> stripes, + FileSystem fileSystem, Path path, + Reader.Options options, + List<OrcProto.Type> types, CompressionCodec codec, + int bufferSize, long strideRate, Configuration conf) throws IOException { + super(stripes, fileSystem, path, options, types, codec, bufferSize, strideRate, conf); + } + + @Override + public OrcProto.RowIndex[] getRowIndexEntries(int stripeIdx) throws IOException { + return readRowIndex(stripeIdx); + } + + @Override + public List<OrcProto.ColumnEncoding> getColumnEncodings(int stripeIdx) throws IOException { + StripeInformation si = stripes.get(stripeIdx); + OrcProto.StripeFooter sf = readStripeFooter(si); + return sf.getColumnsList(); + } + + @Override + public boolean[] getIncludedRowGroups(int stripeIdx) throws IOException { + currentStripe = stripeIdx; + return pickRowGroups(); + } + + @Override + public boolean hasNext() throws IOException { + return false; + } + + @Override + public Object next(Object previous) throws IOException { + return null; + } + + @Override + public VectorizedRowBatch nextBatch(VectorizedRowBatch previousBatch) throws IOException { + return null; + } + + @Override + public long getRowNumber() { + return 0; + } + + @Override + public float getProgress() { + return 0; + } + + @Override + public void close() throws IOException { + + } + + @Override + public void seekToRow(long rowCount) throws IOException { + + } + + @Override + public void readEncodedColumns(long[][] colRgs, int rgCount, SearchArgument sarg, + Consumer<EncodedColumn<OrcBatchKey>> consumer, Allocator allocator) { + + } +} Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java?rev=1650707&view=auto ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java (added) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java Sat Jan 10 01:19:32 2015 @@ -0,0 +1,31 @@ +/** + * Copyright 2014 Prasanth Jayachandran + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.llap.io.orc; + +import java.io.IOException; + +import org.apache.hadoop.fs.Path; + +/** + * + */ +public class OrcFile { + + public static Reader createLLAPReader(Path path, + org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions options) throws IOException { + return new LLAPReaderImpl(path, options); + } +} Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java?rev=1650707&view=auto ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java (added) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java Sat Jan 10 01:19:32 2015 @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.llap.io.orc; + +import java.io.IOException; + +import org.apache.hadoop.hive.llap.io.metadata.CompressionBuffer; +import org.apache.hadoop.hive.ql.io.orc.CompressionKind; +import org.apache.hadoop.hive.ql.io.orc.OrcProto; + +/** + * + */ +public interface Reader extends org.apache.hadoop.hive.ql.io.orc.Reader { + + public RecordReader rows() throws IOException; + + /** + * Read rows out of given compression buffer. + * + * @param buffer - compression buffer + * @param kind - compression kind + * @param encoding - column encoding + * @return - record reader to read rows out of it + * @throws IOException + */ + public RecordReader rows(CompressionBuffer buffer, CompressionKind kind, + OrcProto.ColumnEncoding encoding) throws IOException; +} Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java?rev=1650707&view=auto ============================================================================== --- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java (added) +++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java Sat Jan 10 01:19:32 2015 @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.llap.io.orc; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hive.ql.io.orc.OrcProto; + +/** + * + */ +public interface RecordReader extends org.apache.hadoop.hive.ql.io.orc.RecordReader { + /** + * Return all row index entries for the specified stripe index. + * + * @param stripeIdx - stripe index within orc file + * @return - all row index entries + */ + OrcProto.RowIndex[] getRowIndexEntries(int stripeIdx) throws IOException; + + /** + * Return column encodings of all columns for the specified stripe index. + * + * @param stripeIdx - stripe index within orc file + * @return - column encodings of all columns + */ + List<OrcProto.ColumnEncoding> getColumnEncodings(int stripeIdx) throws IOException; + + /** + * Return the row groups that satisfy the SARG condition for the specified stripe index. + * + * @param stripeIdx - stripe index within orc file + * @return - row groups qualifying the SARG + */ + boolean[] getIncludedRowGroups(int stripeIdx) throws IOException; +} Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java?rev=1650707&r1=1650706&r2=1650707&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java Sat Jan 10 01:19:32 2015 @@ -23,7 +23,7 @@ import java.util.EnumSet; import javax.annotation.Nullable; -interface CompressionCodec { +public interface CompressionCodec { public enum Modifier { /* speed/compression tradeoffs */ Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1650707&r1=1650706&r2=1650707&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Sat Jan 10 01:19:32 2015 @@ -18,6 +18,14 @@ package org.apache.hadoop.hive.ql.io.orc; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT; + import java.io.IOException; import org.apache.hadoop.conf.Configuration; @@ -26,8 +34,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.*; - /** * Contains factory methods to read or write ORC files. */ @@ -184,7 +190,7 @@ public final class OrcFile { private ReaderImpl.FileMetaInfo fileMetaInfo; private long maxLength = Long.MAX_VALUE; - ReaderOptions(Configuration conf) { + public ReaderOptions(Configuration conf) { this.conf = conf; } ReaderOptions fileMetaInfo(ReaderImpl.FileMetaInfo info) { Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1650707&r1=1650706&r2=1650707&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Sat Jan 10 01:19:32 2015 @@ -44,23 +44,23 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.protobuf.CodedInputStream; -final class ReaderImpl implements Reader { +public class ReaderImpl implements Reader { private static final Log LOG = LogFactory.getLog(ReaderImpl.class); private static final int DIRECTORY_SIZE_GUESS = 16 * 1024; - private final FileSystem fileSystem; - private final Path path; - private final CompressionKind compressionKind; - private final CompressionCodec codec; - private final int bufferSize; + protected final FileSystem fileSystem; + protected final Path path; + protected final CompressionKind compressionKind; + protected final CompressionCodec codec; + protected final int bufferSize; private OrcProto.Metadata metadata = null; private final int metadataSize; - private final OrcProto.Footer footer; + protected final OrcProto.Footer footer; private final ObjectInspector inspector; private long deserializedSize = -1; - private final Configuration conf; + protected final Configuration conf; private final List<Integer> versionList; private final OrcFile.WriterVersion writerVersion; @@ -295,7 +295,7 @@ final class ReaderImpl implements Reader * @param options options for reading * @throws IOException */ - ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException { + public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException { FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1650707&r1=1650706&r2=1650707&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sat Jan 10 01:19:32 2015 @@ -55,7 +55,6 @@ import org.apache.hadoop.hive.ql.exec.ve import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.hive.ql.io.orc.LlapUtils.PresentStreamReadResult; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; @@ -81,14 +80,14 @@ import org.apache.hadoop.io.Text; import com.google.common.collect.ComparisonChain; -class RecordReaderImpl implements RecordReader { +public class RecordReaderImpl implements RecordReader { private static final Log LOG = LogFactory.getLog(RecordReaderImpl.class); private static final boolean isLogTraceEnabled = LOG.isTraceEnabled(); private final FSDataInputStream file; private final long firstRow; - private final List<StripeInformation> stripes = + protected final List<StripeInformation> stripes = new ArrayList<StripeInformation>(); private OrcProto.StripeFooter stripeFooter; private final long totalRowCount; @@ -98,7 +97,7 @@ class RecordReaderImpl implements Record private final boolean[] included; private final long rowIndexStride; private long rowInStripe = 0; - private int currentStripe = -1; + protected int currentStripe = -1; private long rowBaseInStripe = 0; private long rowCountInStripe = 0; private final Map<StreamName, InStream> streams = @@ -236,16 +235,16 @@ class RecordReaderImpl implements Record return result; } - RecordReaderImpl(List<StripeInformation> stripes, - FileSystem fileSystem, - Path path, - Reader.Options options, - List<OrcProto.Type> types, - CompressionCodec codec, - int bufferSize, - long strideRate, - Configuration conf - ) throws IOException { + protected RecordReaderImpl(List<StripeInformation> stripes, + FileSystem fileSystem, + Path path, + Reader.Options options, + List<OrcProto.Type> types, + CompressionCodec codec, + int bufferSize, + long strideRate, + Configuration conf + ) throws IOException { this.file = fileSystem.open(path); this.codec = codec; this.types = types; @@ -2271,7 +2270,7 @@ class RecordReaderImpl implements Record } } - OrcProto.StripeFooter readStripeFooter(StripeInformation stripe + protected OrcProto.StripeFooter readStripeFooter(StripeInformation stripe ) throws IOException { long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); @@ -2568,7 +2567,7 @@ class RecordReaderImpl implements Record * row groups must be read. * @throws IOException */ - private boolean[] pickRowGroups() throws IOException { + protected boolean[] pickRowGroups() throws IOException { // if we don't have a sarg or indexes, we read everything if (sarg == null || rowIndexStride == 0) { return null; @@ -3236,7 +3235,7 @@ class RecordReaderImpl implements Record throw new IllegalArgumentException("Seek after the end of reader range"); } - OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException { + protected OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException { long offset = stripes.get(stripeIndex).getOffset(); OrcProto.StripeFooter stripeFooter; OrcProto.RowIndex[] indexes;