HIVE-11592: ORC metadata section can sometimes exceed protobuf message size limit (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f8b02610 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f8b02610 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f8b02610 Branch: refs/heads/llap Commit: f8b02610d745e63e3d596d7532e84e49eedbd62e Parents: 2688b68 Author: Prasanth Jayachandran <j.prasant...@gmail.com> Authored: Wed Aug 19 11:40:52 2015 -0700 Committer: Prasanth Jayachandran <j.prasant...@gmail.com> Committed: Wed Aug 19 11:40:52 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/io/orc/ReaderImpl.java | 34 +++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f8b02610/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java index a6448b6..c990d85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java @@ -46,12 +46,15 @@ import org.apache.hadoop.io.Text; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.protobuf.CodedInputStream; +import com.google.protobuf.InvalidProtocolBufferException; public class ReaderImpl implements Reader { private static final Log LOG = LogFactory.getLog(ReaderImpl.class); private static final int DIRECTORY_SIZE_GUESS = 16 * 1024; + private static final int DEFAULT_PROTOBUF_MESSAGE_LIMIT = 64 << 20; // 64MB + private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB protected final FileSystem fileSystem; protected final Path path; @@ -468,7 +471,36 @@ public class ReaderImpl implements Reader { InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList( new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize); - this.metadata = OrcProto.Metadata.parseFrom(instream); + CodedInputStream in = CodedInputStream.newInstance(instream); + int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT; + OrcProto.Metadata meta = null; + do { + try { + in.setSizeLimit(msgLimit); + meta = OrcProto.Metadata.parseFrom(in); + } catch (InvalidProtocolBufferException e) { + if (e.getMessage().contains("Protocol message was too large")) { + LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" + + " size of the coded input stream." ); + + msgLimit = msgLimit << 1; + if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) { + LOG.error("Metadata section exceeds max protobuf message size of " + + PROTOBUF_MESSAGE_MAX_LIMIT + " bytes."); + throw e; + } + + // we must have failed in the middle of reading instream and instream doesn't support + // resetting the stream + instream = InStream.create("metadata", Lists.<DiskRange>newArrayList( + new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize); + in = CodedInputStream.newInstance(instream); + } else { + throw e; + } + } + } while (meta == null); + this.metadata = meta; footerBuffer.position(position + metadataSize); footerBuffer.limit(position + metadataSize + footerBufferSize);