paleolimbot commented on code in PR #585: URL: https://github.com/apache/arrow-nanoarrow/pull/585#discussion_r1717654952
########## src/nanoarrow/integration/ipc_integration.cc: ########## @@ -0,0 +1,373 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstdlib> +#include <fstream> + +#include <gmock/gmock.h> +#include <gtest/gtest.h> +#include <nanoarrow/nanoarrow_ipc.hpp> +#include <nanoarrow/nanoarrow_testing.hpp> + +std::string GetEnv(char const* name) { + char const* val = std::getenv(name); + return val ? val : ""; +} + +constexpr auto kUsage = R"(USAGE: + # assert that f.arrow reads identical to f.json + env COMMAND=VALIDATE \ + ARROW_PATH=f.arrow \ + JSON_PATH=f.json \ + nanoarrow_ipc_json_integration + + # produce f.arrow from f.json + env COMMAND=JSON_TO_ARROW \ + ARROW_PATH=f.arrow \ + JSON_PATH=f.json \ + nanoarrow_ipc_json_integration + + # copy f.stream into f.arrow + env COMMAND=STREAM_TO_FILE \ + ARROW_PATH=f.arrow \ + nanoarrow_ipc_json_integration < f.stream + + # copy f.arrow into f.stream + env COMMAND=FILE_TO_STREAM \ + ARROW_PATH=f.arrow \ + nanoarrow_ipc_json_integration > f.stream + + # run all internal test cases + nanoarrow_ipc_json_integration +)"; + +ArrowErrorCode Validate(struct ArrowError*); +ArrowErrorCode JsonToArrow(struct ArrowError*); +ArrowErrorCode StreamToFile(struct ArrowError*); +ArrowErrorCode FileToStream(struct ArrowError*); + +int main(int argc, char** argv) try { + std::string command = GetEnv("COMMAND"); + + ArrowErrorCode error_code; + struct ArrowError error; + + if (command == "VALIDATE") { + std::cout << "Validating that " << GetEnv("ARROW_PATH") << " reads identical to " + << GetEnv("JSON_PATH") << std::endl; + + error_code = Validate(&error); + } else if (command == "JSON_TO_ARROW") { + std::cout << "Producing " << GetEnv("ARROW_PATH") << " from " << GetEnv("JSON_PATH") + << std::endl; + + error_code = JsonToArrow(&error); + } else if (command == "STREAM_TO_FILE") { + error_code = StreamToFile(&error); + } else if (command == "FILE_TO_STREAM") { + error_code = FileToStream(&error); + } else { + if (argc == 1 || argv[1] == std::string{"-h"} || argv[1] == std::string{"--help"}) { + // skip printing usage if for example --gtest_list_tests is used; + // that command obviously doesn't need the extra noise + std::cerr << kUsage; + } + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); + } + + if (error_code != NANOARROW_OK) { + std::cerr << "Command " << command << " failed (" << error_code << "=" + << strerror(error_code) << "): " << error.message << std::endl; + } + return error_code; +} catch (std::exception const& e) { + std::cerr << "Uncaught exception: " << e.what() << std::endl; + return 1; +} + +struct File { Review Comment: No need to do it in this PR, but eventually we can consolidate/test the things that appear in both the C Data integration and IPC integration tests into nanoarrowarrow_testing ########## src/nanoarrow/integration/ipc_integration.cc: ########## @@ -0,0 +1,373 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstdlib> +#include <fstream> + +#include <gmock/gmock.h> +#include <gtest/gtest.h> +#include <nanoarrow/nanoarrow_ipc.hpp> +#include <nanoarrow/nanoarrow_testing.hpp> + +std::string GetEnv(char const* name) { + char const* val = std::getenv(name); + return val ? val : ""; +} + +constexpr auto kUsage = R"(USAGE: + # assert that f.arrow reads identical to f.json + env COMMAND=VALIDATE \ + ARROW_PATH=f.arrow \ + JSON_PATH=f.json \ + nanoarrow_ipc_json_integration + + # produce f.arrow from f.json + env COMMAND=JSON_TO_ARROW \ + ARROW_PATH=f.arrow \ + JSON_PATH=f.json \ + nanoarrow_ipc_json_integration + + # copy f.stream into f.arrow + env COMMAND=STREAM_TO_FILE \ + ARROW_PATH=f.arrow \ + nanoarrow_ipc_json_integration < f.stream + + # copy f.arrow into f.stream + env COMMAND=FILE_TO_STREAM \ + ARROW_PATH=f.arrow \ + nanoarrow_ipc_json_integration > f.stream + + # run all internal test cases + nanoarrow_ipc_json_integration +)"; + +ArrowErrorCode Validate(struct ArrowError*); +ArrowErrorCode JsonToArrow(struct ArrowError*); +ArrowErrorCode StreamToFile(struct ArrowError*); +ArrowErrorCode FileToStream(struct ArrowError*); + +int main(int argc, char** argv) try { + std::string command = GetEnv("COMMAND"); + + ArrowErrorCode error_code; + struct ArrowError error; + + if (command == "VALIDATE") { + std::cout << "Validating that " << GetEnv("ARROW_PATH") << " reads identical to " + << GetEnv("JSON_PATH") << std::endl; + + error_code = Validate(&error); + } else if (command == "JSON_TO_ARROW") { + std::cout << "Producing " << GetEnv("ARROW_PATH") << " from " << GetEnv("JSON_PATH") + << std::endl; + + error_code = JsonToArrow(&error); + } else if (command == "STREAM_TO_FILE") { + error_code = StreamToFile(&error); + } else if (command == "FILE_TO_STREAM") { + error_code = FileToStream(&error); + } else { + if (argc == 1 || argv[1] == std::string{"-h"} || argv[1] == std::string{"--help"}) { + // skip printing usage if for example --gtest_list_tests is used; + // that command obviously doesn't need the extra noise + std::cerr << kUsage; + } + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); Review Comment: Is there any way to separate this in such a way that we're not using the same executable for both running tests and as the artifact needed for the integration test setup? Maybe a define to separate the two cases if a separate file is too hard? (No need to solve if this is time consuming!) ########## src/nanoarrow/integration/ipc_integration.cc: ########## @@ -0,0 +1,373 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstdlib> +#include <fstream> + +#include <gmock/gmock.h> +#include <gtest/gtest.h> +#include <nanoarrow/nanoarrow_ipc.hpp> +#include <nanoarrow/nanoarrow_testing.hpp> + +std::string GetEnv(char const* name) { + char const* val = std::getenv(name); + return val ? val : ""; +} + +constexpr auto kUsage = R"(USAGE: + # assert that f.arrow reads identical to f.json + env COMMAND=VALIDATE \ + ARROW_PATH=f.arrow \ + JSON_PATH=f.json \ + nanoarrow_ipc_json_integration + + # produce f.arrow from f.json + env COMMAND=JSON_TO_ARROW \ + ARROW_PATH=f.arrow \ + JSON_PATH=f.json \ + nanoarrow_ipc_json_integration + + # copy f.stream into f.arrow + env COMMAND=STREAM_TO_FILE \ + ARROW_PATH=f.arrow \ + nanoarrow_ipc_json_integration < f.stream + + # copy f.arrow into f.stream + env COMMAND=FILE_TO_STREAM \ + ARROW_PATH=f.arrow \ + nanoarrow_ipc_json_integration > f.stream + + # run all internal test cases + nanoarrow_ipc_json_integration +)"; + +ArrowErrorCode Validate(struct ArrowError*); +ArrowErrorCode JsonToArrow(struct ArrowError*); +ArrowErrorCode StreamToFile(struct ArrowError*); +ArrowErrorCode FileToStream(struct ArrowError*); + +int main(int argc, char** argv) try { + std::string command = GetEnv("COMMAND"); + + ArrowErrorCode error_code; + struct ArrowError error; + + if (command == "VALIDATE") { + std::cout << "Validating that " << GetEnv("ARROW_PATH") << " reads identical to " + << GetEnv("JSON_PATH") << std::endl; + + error_code = Validate(&error); + } else if (command == "JSON_TO_ARROW") { + std::cout << "Producing " << GetEnv("ARROW_PATH") << " from " << GetEnv("JSON_PATH") + << std::endl; + + error_code = JsonToArrow(&error); + } else if (command == "STREAM_TO_FILE") { + error_code = StreamToFile(&error); + } else if (command == "FILE_TO_STREAM") { + error_code = FileToStream(&error); + } else { + if (argc == 1 || argv[1] == std::string{"-h"} || argv[1] == std::string{"--help"}) { + // skip printing usage if for example --gtest_list_tests is used; + // that command obviously doesn't need the extra noise + std::cerr << kUsage; + } + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); + } + + if (error_code != NANOARROW_OK) { + std::cerr << "Command " << command << " failed (" << error_code << "=" + << strerror(error_code) << "): " << error.message << std::endl; + } + return error_code; +} catch (std::exception const& e) { + std::cerr << "Uncaught exception: " << e.what() << std::endl; + return 1; +} + +struct File { + ~File() { + if (file_ != nullptr) { + fclose(file_); + } + } + + ArrowErrorCode open(std::string path, std::string mode, struct ArrowError* error) { + file_ = fopen(path.c_str(), mode.c_str()); + if (file_ != nullptr) { + return NANOARROW_OK; + } + ArrowErrorSet(error, "Opening file '%s' failed with errno=%d", path.c_str(), errno); + return EINVAL; + } + + std::string read() const { + fseek(file_, 0, SEEK_END); + std::string contents(ftell(file_), '\0'); + rewind(file_); + + size_t bytes_read = 0; + while (bytes_read < contents.size()) { + bytes_read += + fread(contents.data() + bytes_read, 1, contents.size() - bytes_read, file_); + } + return contents; + } Review Comment: ```suggestion std::string read() const { fseek(file_, 0, SEEK_END); std::vector<uint8_t> contents(ftell(file_), '\0'); rewind(file_); size_t bytes_read = 0; while (bytes_read < contents.size()) { bytes_read += fread(contents.data() + bytes_read, 1, contents.size() - bytes_read, file_); } return std::string(contents.begin(), contents.end()); } ``` (This bit fails to compile for me...mabye `std::string::data` not being writable before C++17?) ``` [build] /Users/deweydunnington/Desktop/rscratch/arrow-nanoarrow/src/nanoarrow/integration/ipc_integration.cc:127:11: error: no matching function for call to 'fread' [build] fread(contents.data() + bytes_read, 1, contents.size() - bytes_read, file_); [build] ^~~~~ [build] /Library/Developer/CommandLineTools/SDKs/MacOSX14.4.sdk/usr/include/stdio.h:150:9: note: candidate function not viable: no known conversion from 'const value_type *' (aka 'const char *') to 'void *' for 1st argument [build] size_t fread(void * __restrict __ptr, size_t __size, size_t __nitems, FILE * __restrict __stream); [build] ^ ``` ########## src/nanoarrow/nanoarrow_ipc.h: ########## @@ -417,6 +417,36 @@ ArrowErrorCode ArrowIpcArrayStreamReaderInit( struct ArrowArrayStream* out, struct ArrowIpcInputStream* input_stream, struct ArrowIpcArrayStreamReaderOptions* options); +/// \brief The magic which appears at the beginning and end of an IPC file, 0-padded. +#define NANOARROW_IPC_FILE_PADDED_MAGIC "ARROW1\0" Review Comment: Can this be kept private? I also find it a little odd that it's a string (would it get interpreted as a nul-terminated 6 byte string instead of an 8 byte magic header by accident?). (Fine as long as it works and is internal!) ########## src/nanoarrow/nanoarrow_ipc.h: ########## @@ -417,6 +417,36 @@ ArrowErrorCode ArrowIpcArrayStreamReaderInit( struct ArrowArrayStream* out, struct ArrowIpcInputStream* input_stream, struct ArrowIpcArrayStreamReaderOptions* options); +/// \brief The magic which appears at the beginning and end of an IPC file, 0-padded. +#define NANOARROW_IPC_FILE_PADDED_MAGIC "ARROW1\0" + +/// \brief Represents a byte range in an IPC file. Review Comment: Can these be moved to the end of the header, maybe outside the doxygen IPC group, and preceded by something along the lines of "internal API"? ########## src/nanoarrow/ipc/writer.c: ########## @@ -329,3 +362,60 @@ ArrowErrorCode ArrowIpcWriterWriteArrayStream(struct ArrowIpcWriter* writer, ArrowArrayViewReset(&array_view); return NANOARROW_OK; } + +ArrowErrorCode ArrowIpcWriterStartFile(struct ArrowIpcWriter* writer, + struct ArrowError* error) { Review Comment: This would also benefit from a "is it plugged in" test (until we have time to circle back and finalize the API) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
