Repository: hbase Updated Branches: refs/heads/HBASE-14850 3fa0acfb8 -> 9aa4b16b7
HBASE-17278 [C++] Cell Scanner and KeyValueCodec for encoding cells in RPC (Sudeep Sunthankar and enis) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/9aa4b16b Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/9aa4b16b Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/9aa4b16b Branch: refs/heads/HBASE-14850 Commit: 9aa4b16b746efbb85c47d65cc25999444aa7a054 Parents: 3fa0acf Author: Enis Soztutar <e...@apache.org> Authored: Thu Feb 9 11:26:47 2017 -0800 Committer: Enis Soztutar <e...@apache.org> Committed: Thu Feb 9 11:26:58 2017 -0800 ---------------------------------------------------------------------- hbase-native-client/.gitignore | 3 +- hbase-native-client/core/BUCK | 2 + hbase-native-client/core/keyvalue-codec.cc | 69 ++++++++++ hbase-native-client/core/keyvalue-codec.h | 147 +++++++++++++++++++++ hbase-native-client/serde/BUCK | 3 + hbase-native-client/serde/cell-outputstream.h | 53 ++++++++ hbase-native-client/serde/cell-scanner.h | 50 +++++++ hbase-native-client/serde/codec.h | 50 +++++++ 8 files changed, 376 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/9aa4b16b/hbase-native-client/.gitignore ---------------------------------------------------------------------- diff --git a/hbase-native-client/.gitignore b/hbase-native-client/.gitignore index b172fe9..c0032e4 100644 --- a/hbase-native-client/.gitignore +++ b/hbase-native-client/.gitignore @@ -22,4 +22,5 @@ buck-out *.swp # Thirdparty dirs -third-party/googletest* +third-party/* +/gcc-debug/ http://git-wip-us.apache.org/repos/asf/hbase/blob/9aa4b16b/hbase-native-client/core/BUCK ---------------------------------------------------------------------- diff --git a/hbase-native-client/core/BUCK b/hbase-native-client/core/BUCK index f1880a4..d8d15a9 100644 --- a/hbase-native-client/core/BUCK +++ b/hbase-native-client/core/BUCK @@ -22,6 +22,7 @@ cxx_library( "client.h", "cell.h", "hbase_macros.h", + "keyvalue-codec.h", "region-location.h", "location-cache.h", # TODO: move this out of exported @@ -40,6 +41,7 @@ cxx_library( srcs=[ "cell.cc", "client.cc", + "keyvalue-codec.cc", "location-cache.cc", "meta-utils.cc", "get.cc", http://git-wip-us.apache.org/repos/asf/hbase/blob/9aa4b16b/hbase-native-client/core/keyvalue-codec.cc ---------------------------------------------------------------------- diff --git a/hbase-native-client/core/keyvalue-codec.cc b/hbase-native-client/core/keyvalue-codec.cc new file mode 100644 index 0000000..1b526b9 --- /dev/null +++ b/hbase-native-client/core/keyvalue-codec.cc @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "core/keyvalue-codec.h" + +#include <string> + +namespace hbase { + +KeyValueCodec::KVDecoder::KVDecoder(std::unique_ptr<folly::IOBuf> cell_block, uint32_t offset, + uint32_t length) + : cell_block_(std::move(cell_block)), offset_(offset), length_(length) {} + +KeyValueCodec::KVDecoder::~KVDecoder() {} + +std::shared_ptr<Cell> KeyValueCodec::KVDecoder::Decode(folly::io::Cursor &cursor) { + uint32_t key_length = cursor.readBE<uint32_t>(); + uint32_t value_length = cursor.readBE<uint32_t>(); + uint16_t row_length = cursor.readBE<uint16_t>(); + std::string row = cursor.readFixedString(row_length); + uint8_t column_family_length = cursor.readBE<uint8_t>(); + std::string column_family = cursor.readFixedString(column_family_length); + int qualifier_length = + key_length - (row_length + column_family_length + kHBaseSizeOfKeyInfrastructure_); + std::string column_qualifier = cursor.readFixedString(qualifier_length); + uint64_t timestamp = cursor.readBE<uint64_t>(); + uint8_t key_type = cursor.readBE<uint8_t>(); + std::string value = cursor.readFixedString(value_length); + + return std::make_shared<Cell>(row, column_family, column_qualifier, timestamp, value, + static_cast<hbase::CellType>(key_type)); +} + +bool KeyValueCodec::KVDecoder::Advance() { + if (end_of_cell_block_) { + return false; + } + + if (cur_pos_ == length_) { + end_of_cell_block_ = true; + return false; + } + + folly::io::Cursor cursor(cell_block_.get()); + cursor.skip(offset_ + cur_pos_); + uint32_t current_cell_size = cursor.readBE<uint32_t>(); + current_cell_ = Decode(cursor); + cur_pos_ += kHBaseSizeOfInt_ + current_cell_size; + return true; +} + +uint32_t KeyValueCodec::KVDecoder::CellBlockLength() const { return length_; } +} /* namespace hbase */ http://git-wip-us.apache.org/repos/asf/hbase/blob/9aa4b16b/hbase-native-client/core/keyvalue-codec.h ---------------------------------------------------------------------- diff --git a/hbase-native-client/core/keyvalue-codec.h b/hbase-native-client/core/keyvalue-codec.h new file mode 100644 index 0000000..fd58346 --- /dev/null +++ b/hbase-native-client/core/keyvalue-codec.h @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include <folly/io/Cursor.h> +#include <folly/io/IOBuf.h> +#include <memory> + +#include "core/cell.h" +#include "serde/codec.h" + +namespace hbase { + +/** + * @brief Class for parsing sequence of Cells based on org.apache.hadoop.hbase.KeyValueCodec.java + * + * KeyValueCodec implements CellScanner interface. Sequence of cells are obtained from cell_block. + * We have CreateEncoder and CreateDecoder public methods which will return Encoder/Decoder + * instances which will be used to obtain individual cells in cell_block. + * Usage:- + * 1) Cell Decoding:- + * unique_ptr<CellScanner> cell_scanner = KeyValueCodec::CreateDecoder(cell_block, cb_start_offset, + * cb_length); + * while (cell_scanner->Advance()) { + * auto current_cell = cell_scanner->Current + * } + */ +class KeyValueCodec : public Codec { + public: + /** + * Constructor + */ + KeyValueCodec() {} + + std::unique_ptr<Codec::Encoder> CreateEncoder() override { return std::make_unique<KVEncoder>(); } + std::unique_ptr<Codec::Decoder> CreateDecoder(std::unique_ptr<folly::IOBuf> cell_block, + uint32_t offset, uint32_t length) override { + return std::make_unique<KVDecoder>(std::move(cell_block), offset, length); + } + + /** @brief returns the java class name corresponding to this Codec implementation */ + virtual const char* java_class_name() const override { return kJavaClassName; } + + static constexpr const char* kJavaClassName = "org.apache.hadoop.hbase.codec.KeyValueCodec"; + + private: + class KVEncoder : public Codec::Encoder { + public: + KVEncoder() {} + + void Write(const Cell& cell) { + // TODO: Encode Cells using KeyValueCodec wire format + } + + void Flush() {} + }; + + class KVDecoder : public Codec::Decoder { + public: + KVDecoder(std::unique_ptr<folly::IOBuf> cell_block, uint32_t cell_block_start_offset, + uint32_t cell_block_length); + ~KVDecoder(); + + /** + * @brief Overridden from CellScanner. This method parses cell_block and stores the current in + * current_cell_. Current cell can be obtained using cell_scanner.Current(); + */ + bool Advance(); + + /** + * @brief returns the current cell + */ + const std::shared_ptr<Cell> Current() const { return current_cell_; } + + /** + * @brief returns the total length of cell_meta_block + */ + uint32_t CellBlockLength() const; + + private: + std::shared_ptr<Cell> Decode(folly::io::Cursor& cursor); + + /** + * Size of boolean in bytes + */ + const int kHBaseSizeOfBoolean_ = sizeof(uint8_t) / sizeof(uint8_t); + + /** + * Size of byte in bytes + */ + const uint8_t kHBaseSizeOfByte_ = kHBaseSizeOfBoolean_; + + /** + * Size of int in bytes + */ + const uint32_t kHBaseSizeOfInt_ = sizeof(uint32_t) / kHBaseSizeOfByte_; + + /** + * Size of long in bytes + */ + const uint64_t kHBaseSizeOfLong_ = sizeof(uint64_t) / kHBaseSizeOfByte_; + + /** + * Size of Short in bytes + */ + const uint16_t kHBaseSizeOfShort_ = sizeof(uint16_t) / kHBaseSizeOfByte_; + + const uint32_t kHBaseSizeOfKeyLength_ = kHBaseSizeOfInt_; + const uint32_t kHBaseSizeOfValueLength_ = kHBaseSizeOfInt_; + const uint16_t kHBaseSizeOfRowLength_ = kHBaseSizeOfShort_; + const uint8_t kHBaseSizeOfFamilyLength_ = kHBaseSizeOfByte_; + const uint64_t kHBaseSizeOfTimestamp_ = kHBaseSizeOfLong_; + const uint8_t kHBaseSizeOfKeyType_ = kHBaseSizeOfByte_; + const uint32_t kHBaseSizeOfTimestampAndKey_ = kHBaseSizeOfTimestamp_ + kHBaseSizeOfKeyType_; + const uint32_t kHBaseSizeOfKeyInfrastructure_ = + kHBaseSizeOfRowLength_ + kHBaseSizeOfFamilyLength_ + kHBaseSizeOfTimestampAndKey_; + const uint32_t kHBaseSizeOfKeyValueInfrastructure_ = + kHBaseSizeOfKeyLength_ + kHBaseSizeOfValueLength_; + + std::unique_ptr<folly::IOBuf> cell_block_ = nullptr; + uint32_t offset_ = 0; + uint32_t length_ = 0; + uint32_t cur_pos_ = 0; + bool end_of_cell_block_ = false; + + std::shared_ptr<Cell> current_cell_ = nullptr; + }; +}; + +} /* namespace hbase */ http://git-wip-us.apache.org/repos/asf/hbase/blob/9aa4b16b/hbase-native-client/serde/BUCK ---------------------------------------------------------------------- diff --git a/hbase-native-client/serde/BUCK b/hbase-native-client/serde/BUCK index b5aad3d..c6809a8 100644 --- a/hbase-native-client/serde/BUCK +++ b/hbase-native-client/serde/BUCK @@ -18,6 +18,9 @@ cxx_library( name="serde", exported_headers=[ + "cell-scanner.h", + "cell-outputstream.h", + "codec.h", "region-info.h", "rpc.h", "server-name.h", http://git-wip-us.apache.org/repos/asf/hbase/blob/9aa4b16b/hbase-native-client/serde/cell-outputstream.h ---------------------------------------------------------------------- diff --git a/hbase-native-client/serde/cell-outputstream.h b/hbase-native-client/serde/cell-outputstream.h new file mode 100644 index 0000000..963dd31 --- /dev/null +++ b/hbase-native-client/serde/cell-outputstream.h @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include <memory> + +namespace hbase { + +class Cell; + +/** + * @brief Encoder / Decoder for Cells. + */ +class CellOutputStream { + public: + virtual ~CellOutputStream() {} + + /** + * Implementation must copy the entire state of the Cell. If the written Cell is modified + * immediately after the write method returns, the modifications must have absolutely no effect + * on the copy of the Cell that was added in the write. + * @param cell Cell to write out + * @throws IOException + */ + virtual void Write(const Cell& cell) = 0; + + /** + * Let the implementation decide what to do. Usually means writing accumulated data into a + * byte[] that can then be read from the implementation to be sent to disk, put in the block + * cache, or sent over the network. + * @throws IOException + */ + virtual void Flush() = 0; +}; + +} /* namespace hbase */ http://git-wip-us.apache.org/repos/asf/hbase/blob/9aa4b16b/hbase-native-client/serde/cell-scanner.h ---------------------------------------------------------------------- diff --git a/hbase-native-client/serde/cell-scanner.h b/hbase-native-client/serde/cell-scanner.h new file mode 100644 index 0000000..fe4a249 --- /dev/null +++ b/hbase-native-client/serde/cell-scanner.h @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once +#include <folly/io/IOBuf.h> +#include <memory> + +namespace hbase { + +class Cell; + +/** + * @brief Interface for iterating over a sequence of Cells + */ +class CellScanner { + public: + virtual ~CellScanner() {} + + /** + * @brief This method will be used to iterate the cells. + * Typical usage will be :- + * while(cell_scanner.Advance()){ + * auto current_cell = cell_scanner.Current(); + * } + */ + virtual bool Advance() = 0; + + /** + * @brief returns the current cell + */ + virtual const std::shared_ptr<Cell> Current() const = 0; +}; + +} /* namespace hbase */ http://git-wip-us.apache.org/repos/asf/hbase/blob/9aa4b16b/hbase-native-client/serde/codec.h ---------------------------------------------------------------------- diff --git a/hbase-native-client/serde/codec.h b/hbase-native-client/serde/codec.h new file mode 100644 index 0000000..64807dc --- /dev/null +++ b/hbase-native-client/serde/codec.h @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include <folly/io/IOBuf.h> +#include <memory> + +#include "serde/cell-outputstream.h" +#include "serde/cell-scanner.h" + +namespace hbase { + +/** + * @brief Encoder / Decoder for Cells. + */ +class Codec { + public: + virtual ~Codec() {} + + class Encoder : public CellOutputStream {}; + + class Decoder : public CellScanner {}; + + virtual std::unique_ptr<Encoder> CreateEncoder() = 0; + virtual std::unique_ptr<Decoder> CreateDecoder(std::unique_ptr<folly::IOBuf> cell_block, + uint32_t cell_block_start_offset, + uint32_t cell_block_length) = 0; + + /** @brief returns the java class name corresponding to this Codec implementation */ + virtual const char* java_class_name() const = 0; +}; + +} /* namespace hbase */