wgtmac commented on code in PR #624: URL: https://github.com/apache/iceberg-cpp/pull/624#discussion_r3287421415
########## src/iceberg/puffin/puffin_reader.h: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/puffin/puffin_reader.h +/// Puffin file reader. + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <span> +#include <utility> +#include <vector> + +#include "iceberg/iceberg_data_export.h" +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/result.h" + +namespace iceberg { +class InputFile; +class SeekableInputStream; +} // namespace iceberg + +namespace iceberg::puffin { + +/// \brief Reader for Puffin files. +/// +/// Supports two modes: +/// - Memory mode: parses from an in-memory buffer. +/// - File mode: reads from an InputFile with seek support (for DV use case). +class ICEBERG_DATA_EXPORT PuffinReader { + public: + /// \brief Construct a memory-mode reader from file data. + explicit PuffinReader(std::span<const std::byte> data); Review Comment: Let's remove the api for pure in-memory data which distracts and complicates its implementation. ########## src/iceberg/puffin/puffin_writer.h: ########## @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/puffin/puffin_writer.h +/// Puffin file writer. + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <optional> +#include <span> +#include <string> +#include <unordered_map> +#include <vector> + +#include "iceberg/iceberg_data_export.h" +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/result.h" + +namespace iceberg { +class OutputFile; +class PositionOutputStream; +} // namespace iceberg + +namespace iceberg::puffin { + +/// \brief Writer for Puffin files. +/// +/// Supports two modes: +/// - Stream mode: writes directly to an OutputFile/PositionOutputStream. +/// - Memory mode: builds the file in an internal buffer and returns it via Finish(). +class ICEBERG_DATA_EXPORT PuffinWriter { + public: + /// \brief Construct a memory-mode writer. Review Comment: Let's remove this use case not to complicate the implementation. ########## src/iceberg/puffin/puffin_writer.h: ########## @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/puffin/puffin_writer.h +/// Puffin file writer. + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <optional> +#include <span> +#include <string> +#include <unordered_map> +#include <vector> + +#include "iceberg/iceberg_data_export.h" +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/result.h" + +namespace iceberg { +class OutputFile; +class PositionOutputStream; +} // namespace iceberg + +namespace iceberg::puffin { + +/// \brief Writer for Puffin files. +/// +/// Supports two modes: +/// - Stream mode: writes directly to an OutputFile/PositionOutputStream. +/// - Memory mode: builds the file in an internal buffer and returns it via Finish(). +class ICEBERG_DATA_EXPORT PuffinWriter { + public: + /// \brief Construct a memory-mode writer. + /// \param properties File-level properties to include in the footer. + /// \param default_codec Default compression codec for blobs. + explicit PuffinWriter( + std::unordered_map<std::string, std::string> properties = {}, + PuffinCompressionCodec default_codec = PuffinCompressionCodec::kNone); + + /// \brief Construct a stream-mode writer from an OutputFile. + /// \param output_file The output file to write to. + /// \param properties File-level properties to include in the footer. + /// \param default_codec Default compression codec for blobs. + explicit PuffinWriter( + std::unique_ptr<OutputFile> output_file, + std::unordered_map<std::string, std::string> properties = {}, + PuffinCompressionCodec default_codec = PuffinCompressionCodec::kNone); Review Comment: Why there is no `bool compressFooter` parameter? ########## src/iceberg/puffin/puffin_writer.h: ########## @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/puffin/puffin_writer.h +/// Puffin file writer. + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <optional> +#include <span> +#include <string> +#include <unordered_map> +#include <vector> + +#include "iceberg/iceberg_data_export.h" +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/result.h" + +namespace iceberg { +class OutputFile; +class PositionOutputStream; +} // namespace iceberg + +namespace iceberg::puffin { + +/// \brief Writer for Puffin files. +/// +/// Supports two modes: +/// - Stream mode: writes directly to an OutputFile/PositionOutputStream. +/// - Memory mode: builds the file in an internal buffer and returns it via Finish(). +class ICEBERG_DATA_EXPORT PuffinWriter { + public: + /// \brief Construct a memory-mode writer. + /// \param properties File-level properties to include in the footer. + /// \param default_codec Default compression codec for blobs. + explicit PuffinWriter( + std::unordered_map<std::string, std::string> properties = {}, + PuffinCompressionCodec default_codec = PuffinCompressionCodec::kNone); + + /// \brief Construct a stream-mode writer from an OutputFile. + /// \param output_file The output file to write to. + /// \param properties File-level properties to include in the footer. + /// \param default_codec Default compression codec for blobs. + explicit PuffinWriter( + std::unique_ptr<OutputFile> output_file, + std::unordered_map<std::string, std::string> properties = {}, + PuffinCompressionCodec default_codec = PuffinCompressionCodec::kNone); + + ~PuffinWriter(); + + /// \brief Add a blob to be written. + Status Add(const Blob& blob); + + /// \brief Finalize the file. + /// + /// In memory mode, returns the complete serialized file bytes. + /// In stream mode, flushes and closes the stream, returns empty vector. + Result<std::vector<std::byte>> Finish(); + + /// \brief Get metadata for all blobs written so far. + const std::vector<BlobMetadata>& written_blobs_metadata() const; + + /// \brief Get the footer size after Finish() has been called. + std::optional<int64_t> footer_size() const; + + /// \brief Get the total file size after Finish() has been called. + std::optional<int64_t> file_size() const; Review Comment: ditto ########## src/iceberg/puffin/puffin_writer.h: ########## @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/puffin/puffin_writer.h +/// Puffin file writer. + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <optional> +#include <span> +#include <string> +#include <unordered_map> +#include <vector> + +#include "iceberg/iceberg_data_export.h" +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/result.h" + +namespace iceberg { +class OutputFile; +class PositionOutputStream; +} // namespace iceberg + +namespace iceberg::puffin { + +/// \brief Writer for Puffin files. +/// +/// Supports two modes: +/// - Stream mode: writes directly to an OutputFile/PositionOutputStream. +/// - Memory mode: builds the file in an internal buffer and returns it via Finish(). +class ICEBERG_DATA_EXPORT PuffinWriter { + public: + /// \brief Construct a memory-mode writer. + /// \param properties File-level properties to include in the footer. + /// \param default_codec Default compression codec for blobs. + explicit PuffinWriter( + std::unordered_map<std::string, std::string> properties = {}, + PuffinCompressionCodec default_codec = PuffinCompressionCodec::kNone); + + /// \brief Construct a stream-mode writer from an OutputFile. + /// \param output_file The output file to write to. + /// \param properties File-level properties to include in the footer. + /// \param default_codec Default compression codec for blobs. + explicit PuffinWriter( + std::unique_ptr<OutputFile> output_file, + std::unordered_map<std::string, std::string> properties = {}, + PuffinCompressionCodec default_codec = PuffinCompressionCodec::kNone); + + ~PuffinWriter(); + + /// \brief Add a blob to be written. + Status Add(const Blob& blob); Review Comment: Why there is no `Result<BlobMetadata> Write(const Blob& blob)`? ########## src/iceberg/puffin/puffin_writer.h: ########## @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/puffin/puffin_writer.h +/// Puffin file writer. + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <optional> +#include <span> +#include <string> +#include <unordered_map> +#include <vector> + +#include "iceberg/iceberg_data_export.h" +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/result.h" + +namespace iceberg { +class OutputFile; +class PositionOutputStream; +} // namespace iceberg + +namespace iceberg::puffin { + +/// \brief Writer for Puffin files. +/// +/// Supports two modes: +/// - Stream mode: writes directly to an OutputFile/PositionOutputStream. +/// - Memory mode: builds the file in an internal buffer and returns it via Finish(). +class ICEBERG_DATA_EXPORT PuffinWriter { + public: + /// \brief Construct a memory-mode writer. + /// \param properties File-level properties to include in the footer. + /// \param default_codec Default compression codec for blobs. + explicit PuffinWriter( + std::unordered_map<std::string, std::string> properties = {}, + PuffinCompressionCodec default_codec = PuffinCompressionCodec::kNone); + + /// \brief Construct a stream-mode writer from an OutputFile. + /// \param output_file The output file to write to. + /// \param properties File-level properties to include in the footer. + /// \param default_codec Default compression codec for blobs. + explicit PuffinWriter( + std::unique_ptr<OutputFile> output_file, + std::unordered_map<std::string, std::string> properties = {}, + PuffinCompressionCodec default_codec = PuffinCompressionCodec::kNone); + + ~PuffinWriter(); + + /// \brief Add a blob to be written. + Status Add(const Blob& blob); + + /// \brief Finalize the file. + /// + /// In memory mode, returns the complete serialized file bytes. + /// In stream mode, flushes and closes the stream, returns empty vector. + Result<std::vector<std::byte>> Finish(); + + /// \brief Get metadata for all blobs written so far. + const std::vector<BlobMetadata>& written_blobs_metadata() const; + + /// \brief Get the footer size after Finish() has been called. + std::optional<int64_t> footer_size() const; Review Comment: Return `Result<int64_t>` to emit unclosed error. ########## src/iceberg/puffin/puffin_reader.h: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/puffin/puffin_reader.h +/// Puffin file reader. + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <span> +#include <utility> +#include <vector> + +#include "iceberg/iceberg_data_export.h" +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/result.h" + +namespace iceberg { +class InputFile; +class SeekableInputStream; +} // namespace iceberg + +namespace iceberg::puffin { + +/// \brief Reader for Puffin files. +/// +/// Supports two modes: +/// - Memory mode: parses from an in-memory buffer. +/// - File mode: reads from an InputFile with seek support (for DV use case). +class ICEBERG_DATA_EXPORT PuffinReader { + public: + /// \brief Construct a memory-mode reader from file data. + explicit PuffinReader(std::span<const std::byte> data); + + /// \brief Construct a file-mode reader from an InputFile. + explicit PuffinReader(std::unique_ptr<InputFile> input_file); Review Comment: We cannot throw exception in practice, so we may need a static function like `Result<std::unique_ptr<PuffinReader>> Make(std::unique_ptr<InputFile> input_file)` to handle any error, like null input. Same for writer. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
