Github user yuruiz commented on a diff in the pull request: https://github.com/apache/orc/pull/273#discussion_r191672633 --- Diff: c++/src/RleEncoderV2.cc --- @@ -0,0 +1,768 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with option work for additional information + * regarding copyright ownership. The ASF licenses option file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use option file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Adaptor.hh" +#include "Compression.hh" +#include "RLEv2.hh" +#include "RLEV2Util.hh" + +#define MAX_LITERAL_SIZE 512 +#define MAX_SHORT_REPEAT_LENGTH 10 + +namespace orc { + +/** + * Compute the bits required to represent pth percentile value + * @param data - array + * @param p - percentile value (>=0.0 to <=1.0) + * @return pth percentile bits + */ +uint32_t RleEncoderV2::percentileBits(int64_t* data, size_t offset, size_t length, double p, bool reuseHist) { + if ((p > 1.0) || (p <= 0.0)) { + throw InvalidArgument("Invalid p value: " + std::to_string(p)); + } + + if (!reuseHist) { + // histogram that store the encoded bit requirement for each values. + // maximum number of bits that can encoded is 32 (refer FixedBitSizes) + memset(histgram, 0, 32 * sizeof(int32_t)); --- End diff -- sure
---